├── .env.example
├── .gitignore
├── .python-version
├── CONTRIBUTING.md
├── LICENCE
├── Makefile
├── README.md
├── app
    ├── agents
    │   ├── clients
    │   │   ├── __init__.py
    │   │   └── superlinked.py
    │   ├── config
    │   │   └── qdrant.py
    │   ├── core
    │   │   ├── agent_state.py
    │   │   └── base_agent.py
    │   ├── implementations
    │   │   ├── blog_post
    │   │   │   ├── __init__.py
    │   │   │   └── agent.py
    │   │   ├── main_deprecated.py
    │   │   ├── search_amazon_products
    │   │   │   ├── agent_by_json.py
    │   │   │   └── agent_by_superlinked.py
    │   │   └── supervisor.py
    │   ├── langchain
    │   │   ├── __init__.py
    │   │   ├── edges
    │   │   │   ├── __init__.py
    │   │   │   ├── check_relevance.py
    │   │   │   ├── edges.py
    │   │   │   └── grade_documents.py
    │   │   ├── factory.py
    │   │   ├── interface
    │   │   │   ├── __init__.py
    │   │   │   ├── base_provider.py
    │   │   │   └── events.py
    │   │   ├── memory
    │   │   │   └── long_term.py
    │   │   ├── nodes
    │   │   │   ├── __init__.py
    │   │   │   ├── agent.py
    │   │   │   ├── generate.py
    │   │   │   ├── nodes.py
    │   │   │   ├── rewrite.py
    │   │   │   └── supervisor.py
    │   │   ├── template.py
    │   │   ├── tools
    │   │   │   ├── __init__.py
    │   │   │   ├── amazon_products_search.py
    │   │   │   ├── blog_posts.py
    │   │   │   └── tools.py
    │   │   └── vector_store
    │   │   │   ├── json_retriever.py
    │   │   │   ├── sl_amazon_products_retriever.py
    │   │   │   └── url_retriever.py
    │   └── schema
    │   │   └── superlinked
    │   │       ├── __init__.py
    │   │       ├── index.py
    │   │       └── query_search.py
    ├── utils
    │   ├── __init__.py
    │   ├── constants.py
    │   ├── helpers.py
    │   ├── mapper.py
    │   ├── prompts.py
    │   └── types.py
    └── voice_assistant
    │   ├── __init__.py
    │   └── assistant.py
├── assets
    ├── livekit_playground.png
    ├── oliva_architecture.png
    ├── oliva_arquitecture_v2.png
    └── snapshot.zip
├── langgraph.json
├── pyproject.toml
├── use_cases
    ├── agent_blog_post_url.py
    ├── agent_search_by_json.py
    ├── agent_search_by_superlinked.py
    └── agent_supervisor.py
└── uv.lock


/.env.example:
--------------------------------------------------------------------------------
 1 | LIVEKIT_URL=http://localhost:7880/
 2 | LIVEKIT_API_KEY=devkey
 3 | LIVEKIT_API_SECRET=secret
 4 | 
 5 | # QDRANT
 6 | QDRANT_URL=http://localhost:6333
 7 | QDRANT_API_KEY=...
 8 | QDRANT_VECTOR_DIMENSION=2054
 9 | QDRANT_VECTOR_DISTANCE=Dot
10 | QDRANT_VECTOR_NAME=...
11 | # QDRANT long term memory
12 | QDRANT_COLLECTION_NAME=...
13 | 
14 | # Deepgram
15 | DEEPGRAM_API_KEY=...
16 | 
17 | # Openai
18 | OPENAI_API_KEY=...
19 | OPENAI_MODEL=text-embedding-3-large
20 | 
21 | # https://smith.langchain.com
22 | LANGSMITH_TRACING=true
23 | LANGSMITH_API_KEY=...
24 | 
25 | # elevenlabs
26 | ELEVENLABS_API_KEY=...


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 
164 | # VSCode / Cursor
165 | .vscode
166 | 
167 | # Data
168 | data/
169 | in_memory_vdb/
170 | .langgraph_api
171 | 
172 | # MacOs
173 | .DS_Store 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.12
2 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Welcome to Oliva Contributing Guide! 👋
  2 | 
  3 | We're thrilled that you're interested in contributing to Oliva! As an open-source multi-agent assistant, every contribution helps make AI more accessible to everyone 🚀
  4 | 
  5 | ## Ways to Contribute
  6 | 
  7 | Every contribution matters, no matter how small! Here are some ways you can help:
  8 | 
  9 | - 📝 Fixing typos or improving documentation
 10 | - 🐛 Reporting and fixing bugs
 11 | - ✨ Adding new features or enhancements
 12 | - 🔧 Improving existing code
 13 | - 🎨 Enhancing the user interface
 14 | - 🌍 Adding support for different languages or platforms
 15 | - 🧪 Writing tests
 16 | - 📚 Improving examples and tutorials
 17 | 
 18 | ## Quick Start Guide
 19 | 
 20 | 1. **Fork & Clone**
 21 |    ```bash
 22 |    git clone https://github.com/Deluxer/oliva.git
 23 |    cd oliva
 24 |    git remote add upstream https://github.com/Deluxer/oliva.git
 25 |    ```
 26 | 
 27 | 2. **Set Up Environment**
 28 |    ```bash
 29 |    # Create virtual environment with uv
 30 |    uv sync
 31 | 
 32 |    # Configure your environment
 33 |    cp .env.example .env
 34 |    # Edit .env with your API keys and settings
 35 |    ```
 36 | 
 37 | 3. **Start Coding**
 38 |    ```bash
 39 |    # Create a new branch
 40 |    git checkout -b feature/amazing-feature
 41 |    # or
 42 |    git checkout -b fix/bug-description
 43 |    ```
 44 | 
 45 | ## Development Guidelines
 46 | 
 47 | ### 🔧 Technical Requirements
 48 | 
 49 | - Python 3.12+
 50 | - Docker (for Qdrant)
 51 | - Livekit account
 52 | - Deepgram account
 53 | - OpenAI API key
 54 | 
 55 | ### 📝 Code Style
 56 | 
 57 | - Follow PEP 8
 58 | - Use type hints
 59 | - Write descriptive docstrings
 60 | - Keep functions focused and modular
 61 | - Add comments for complex logic
 62 | 
 63 | ### ✅ Before Submitting
 64 | 
 65 | 1. **Test Your Changes**
 66 |    ```bash
 67 |    # Run all tests
 68 |    make test
 69 | 
 70 |    # Check code style
 71 |    make lint
 72 |    ```
 73 | 
 74 | 2. **Update Documentation**
 75 |    - Add docstrings for new functions
 76 |    - Update README if needed
 77 |    - Add examples for new features
 78 | 
 79 | 3. **Commit Your Changes**
 80 |    ```bash
 81 |    git add .
 82 |    git commit -m "feat: add amazing feature"
 83 |    ```
 84 |    Use conventional commits:
 85 |    - `feat:` new features
 86 |    - `fix:` bug fixes
 87 |    - `docs:` documentation
 88 |    - `test:` adding tests
 89 |    - `refactor:` code improvements
 90 | 
 91 | ## 🐛 Reporting Issues
 92 | 
 93 | Found a bug? Have a suggestion? We'd love to hear from you! When creating an issue:
 94 | 
 95 | 1. Check if the issue already exists
 96 | 2. Include:
 97 |    - Your Python and dependency versions
 98 |    - Clear steps to reproduce
 99 |    - Expected vs actual behavior
100 |    - Relevant logs or screenshots
101 |    - Operating system details
102 | 
103 | ## 🚀 Pull Request Process
104 | 
105 | 1. Update your fork
106 |    ```bash
107 |    git fetch upstream
108 |    git rebase upstream/main
109 |    ```
110 | 
111 | 2. Push your changes
112 |    ```bash
113 |    git push origin your-branch-name
114 |    ```
115 | 
116 | 3. Open a PR with:
117 |    - Clear description of changes
118 |    - Screenshots for UI changes
119 |    - Reference to related issues
120 |    - List of breaking changes (if any)
121 | 
122 | 4. Respond to review comments
123 | 
124 | ## 📞 Questions
125 | 
126 | Please send your questions to gerardoangeln@gmail.com or creating a public issue.
127 | 
128 | ## ✨ Recognition
129 | 
130 | Contributors are listed in our [CONTRIBUTORS.md](./CONTRIBUTORS.md) file. Thank you for making Oliva better! 🙏
131 | 
132 | Let's build the future of AI assistants together! 🤖✨


--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Oliva
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | oliva-start:
 2 | 	uv run python -m app.voice_assistant.assistant start
 3 | 
 4 | agent-search-by-superlinked:
 5 | 	uv run python -m use_cases.agent_search_by_superlinked
 6 | 
 7 | agent-search-by-json:
 8 | 	uv run python -m use_cases.agent_search_by_json
 9 | 
10 | agent-blog-post:
11 | 	uv run python -m use_cases.agent_blog_post_url
12 | 
13 | agent-supervisor:
14 | 	uv run python -m use_cases.agent_supervisor


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 |   <h1>Oliva Multi-Agent Assistant</h1>
  3 | </div>
  4 | <p align="center">Oliva is a multi-agent assistant that combines different agents to handle various tasks. These agents can be used to perform various tasks such as semantic search, text generation, question answering, and more. The project is designed to be flexible and extensible, allowing you to add more agents as needed.</p>
  5 | 
  6 | <p align="center">
  7 |   <img src="assets/oliva_arquitecture_v2.png" alt="Oliva architecture" width="800">
  8 | </p>
  9 | 
 10 | ### Requirements:
 11 | 
 12 | <table>
 13 | <thead>
 14 |   <tr>
 15 |     <th>Requirement</th>
 16 |     <th>Description</th>
 17 |   </tr>
 18 | </thead>
 19 | <tbody>
 20 |   <tr>
 21 |     <td>Database Population</td>
 22 |     <td>Follow the setup instructions in the <a href="https://github.com/decodingml/tabular-semantic-search-tutorial">tabular-semantic-search-tutorial</a> or download the snapshot in <code>assets/snapshot.zip</code></td>
 23 |   </tr>
 24 |   <tr>
 25 |     <td>Qdrant</td>
 26 |     <td>Vector database for efficient similarity search and storage of embeddings.</td>
 27 |   </tr>
 28 |   <tr>
 29 |     <td>Superlinked</td>
 30 |     <td>Framework for building AI applications with semantic search capabilities.</td>
 31 |   </tr>
 32 |   <tr>
 33 |     <td>Deepgram Account</td>
 34 |     <td>Speech-to-text service account required for converting voice input into text.</td>
 35 |   </tr> 
 36 |   <tr>
 37 |     <td>Livekit Account</td>
 38 |     <td>Real-time communication platform needed for handling voice interactions.</td>
 39 |   </tr>
 40 |   <tr>
 41 |     <td>Python Knowledge</td>
 42 |     <td>Understanding of Python programming language (version 3.12+).</td>
 43 |   </tr>
 44 | </tbody>
 45 | </table>
 46 | 
 47 | ### Installation
 48 | 1. Install project dependencies:
 49 | ```bash
 50 | uv sync
 51 | ```
 52 | 
 53 | This will create a virtual environment in `.venv` and install all required dependencies.
 54 | 
 55 | 2. Livekit account
 56 | 
 57 | Create a Livekit account in [Livekit Cloud](https://cloud.livekit.io/login) and get `LIVEKIT_URL`, `LIVEKIT_API_KEY` and `LIVEKIT_API_SECRET`.
 58 | 
 59 | 
 60 | ```bash
 61 | LIVEKIT_URL=wss://your-project.livekit.cloud
 62 | LIVEKIT_API_KEY=secret
 63 | LIVEKIT_API_SECRET=********
 64 | ```
 65 | 
 66 | 3. Environment variables
 67 | 
 68 | Before running any Python scripts, set the following environment variables:
 69 | ```bash
 70 | cp .env.example .env
 71 | ```
 72 | 
 73 | 4. Qdrant
 74 | 
 75 | Use docker to run Qdrant, set an API key wherever you want:
 76 | ```bash
 77 | docker run -p 6333:6333 -p 6334:6334 \
 78 |     -e QDRANT__SERVICE__API_KEY=******** \
 79 |     -v "$(pwd)/qdrant_storage:/qdrant/storage:z" \
 80 |     qdrant/qdrant
 81 | ```
 82 | 
 83 | ## Usage
 84 | 
 85 | ### Voice assistant
 86 | ```bash
 87 | make oliva-start
 88 | ```
 89 | 
 90 | ### Voice assistant frontend
 91 | 
 92 | Use [Agent playground](https://agents-playground.livekit.io/) and connect with your Livekit project to interact with the voice assistant.
 93 | If you prefer run locally, download the repo [Agent playground](https://github.com/livekit/agents-playground) and run `npm run start`.
 94 | 
 95 | 
 96 | <p align="center">
 97 |   <img src="assets/livekit_playground.png" alt="Livekit Agent Playground" width="900">
 98 | </p>
 99 | 
100 | 
101 | ## Project Structure
102 | 
103 | ```
104 | oliva/
105 | ├── app/
106 | │    ├── agents/
107 | │    │   ├── implementations/     # Individual agent implementations
108 | │    │   ├── core/       # Base classes and interfaces for agent components
109 | │    │   └── langchain/
110 | │    │       ├── base/      # Base LangChain integration classes
111 | │    │       ├── config/    # LangChain configuration
112 | │    │       ├── edges/     # Edge conditions for workflow routing
113 | │    │       ├── nodes/     # Node implementations (agent, rewrite, generate)
114 | │    │       └── tools/     # LangChain-specific tools
115 | │    ├── voice_assistant/
116 | │    └── utils/              # Shared utilities
117 | ```
118 | 
119 | ### Architecture Overview
120 | 
121 | The project follows a modular architecture implementing an agentic RAG (Retrieval-Augmented Generation) system:
122 | 
123 | 1. **Agent Components** (`app/agents/`)
124 |    - `agents/`: Contains specific agent implementations
125 |    - `core/`: Defines core interfaces and abstract classes for:
126 |      - State management
127 |      - Node implementations
128 |      - Edge conditions
129 |      - Tool interfaces
130 |      - Graph workflow definitions
131 | 
132 | 2. **LangChain Integration** (`app/agents/integrations/langchain/`)
133 |    - Provides LangChain-specific implementations for:
134 |      - Document retrieval
135 |      - Tool operations
136 |      - State management
137 |      - Workflow nodes and edges
138 | 
139 | 3. **Voice Assistant** (`app/voice_assistant/`)
140 |    - LiveKit integration
141 |       * Voice interface implementation
142 |       * Speech-to-text and text-to-speech capabilities
143 | 
144 | 4. **Utilities** (`app/utils/`)
145 |    - Shared helper functions
146 |    - Common utilities used across modules
147 | 
148 | The system implements a graph-based workflow where each agent processes state through a series of nodes (functions) connected by conditional edges, supporting dynamic routing based on the agent's decisions.
149 | 
150 | ## Use cases
151 | 
152 | Langchain workflow by supervisor agent
153 | ```bash
154 | make agent-supervisor
155 | ```
156 | 
157 | Langchain workflow by superlinked
158 | ```bash
159 | make agent-search-by-superlinked
160 | ```
161 | 
162 | Langchain workflow by json file
163 | ```bash
164 | make agent-search-by-json
165 | ```
166 | 
167 | ## Tech Stack
168 | 
169 | <table>
170 | <thead>
171 |   <tr>
172 |     <th>Technology</th>
173 |     <th>Version/Type</th>
174 |     <th>Role</th>
175 |   </tr>
176 | </thead>
177 | <tbody>
178 |   <tr>
179 |     <td><a href="https://python.langchain.com">Langchain</a></td>
180 |     <td>Latest</td>
181 |     <td>LLM application framework</td>
182 |   </tr>
183 |   <tr>
184 |     <td><a href="https://livekit.io">Livekit</a></td>
185 |     <td>Cloud/Self-hosted</td>
186 |     <td>Real-time voice communication</td>
187 |   </tr>
188 |   <tr>
189 |     <td><a href="https://qdrant.tech">Qdrant</a></td>
190 |     <td>Vector DB</td>
191 |     <td>Semantic search storage</td>
192 |   </tr>
193 |   <tr>
194 |     <td><a href="https://superlinked.com">Superlinked</a></td>
195 |     <td>Framework</td>
196 |     <td>Semantic search capabilities</td>
197 |   </tr>
198 |   <tr>
199 |     <td><a href="https://deepgram.com">Deepgram</a></td>
200 |     <td>API Service</td>
201 |     <td>Speech-to-text conversion</td>
202 |   </tr>
203 |   <tr>
204 |     <td><a href="https://openai.com">OpenAI</a></td>
205 |     <td>API Service</td>
206 |     <td>LLM provider</td>
207 |   </tr>
208 |   <tr>
209 |     <td><a href="https://python.org">Python</a></td>
210 |     <td>3.12+</td>
211 |     <td>Core implementation</td>
212 |   </tr>
213 | </tbody>
214 | </table>
215 | 
216 | ## 📚 Contributing
217 | 
218 | This project is open-source and welcomes contributions from the community. For more details on how to contribute, please refer to the [Contributing Guide](./CONTRIBUTING.md).


--------------------------------------------------------------------------------
/app/agents/clients/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | LangChain integration module for Agentic RAG implementation.
 3 | """
 4 | 
 5 | from .superlinked import SuperlinkedClient
 6 | 
 7 | 
 8 | __all__ = [
 9 |     'SuperlinkedClient',
10 | ]


--------------------------------------------------------------------------------
/app/agents/clients/superlinked.py:
--------------------------------------------------------------------------------
 1 | from app.agents.schema.superlinked import index
 2 | import superlinked.framework as sl
 3 | from app.agents.config.qdrant import qdrant_config
 4 | from typing import Optional
 5 | import logging
 6 | 
 7 | class QdrantConnectionError(Exception):
 8 |     pass
 9 | 
10 | class SuperlinkedClient:
11 |     def __init__(self) -> None:
12 |         self.app = None
13 | 
14 |     def setup(self):
15 |         self.app: Optional[sl.InteractiveExecutor] = None
16 |         try:
17 |             product_source: sl.InteractiveSource = sl.InteractiveSource(index.product)
18 | 
19 |             vector_database = sl.QdrantVectorDatabase(
20 |                 url=qdrant_config.QDRANT_URL.get_secret_value(),
21 |                 api_key=qdrant_config.QDRANT_API_KEY.get_secret_value(),
22 |                 default_query_limit=10,
23 |             )     
24 | 
25 |             executor = sl.InteractiveExecutor(
26 |                 sources=[product_source],
27 |                 indices=[index.product_index],
28 |                 vector_database=vector_database,
29 |             )
30 |             self.app = executor.run()
31 |         except Exception as e:
32 |             logging.error(f"Failed to connect to Qdrant: {str(e)}")
33 |             raise QdrantConnectionError("Failed to establish connection with Qdrant vector database. Please check your connection and credentials.") from e
34 | 
35 | superlinked = SuperlinkedClient()


--------------------------------------------------------------------------------
/app/agents/config/qdrant.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from pydantic import SecretStr
 3 | from pydantic_settings import BaseSettings
 4 | from dotenv import load_dotenv
 5 | 
 6 | load_dotenv()
 7 | 
 8 | class QdrantConfig(BaseSettings):
 9 |     """Qdrant client settings."""
10 |     # superlinked
11 |     QDRANT_URL: SecretStr
12 |     QDRANT_API_KEY: SecretStr
13 |     QDRANT_VECTOR_DIMENSION: int = 2054
14 |     QDRANT_VECTOR_DISTANCE: str = "Dot"
15 |     # long term memory
16 |     QDRANT_COLLECTION_NAME: str = "oliva_history"
17 |     QDRANT_VECTOR_NAME: str = "history_vector"
18 | 
19 | qdrant_config = QdrantConfig()


--------------------------------------------------------------------------------
/app/agents/core/agent_state.py:
--------------------------------------------------------------------------------
 1 | from typing import Annotated, Sequence
 2 | from langgraph.graph import MessagesState
 3 | from langchain_core.messages import BaseMessage
 4 | from langgraph.graph.message import add_messages
 5 | from app.utils.types import ToolType
 6 | import operator
 7 | 
 8 | def last_value_reducer(current: str | None, new: str) -> str:
 9 |     """Reducer that keeps only the last value, preserving initial value if new is empty"""
10 |     if new == "":
11 |         return current if current is not None else ""
12 |     return new
13 | 
14 | class BaseState(MessagesState):
15 |     messages: Annotated[Sequence[BaseMessage], add_messages]
16 |     next: Annotated[str, last_value_reducer]
17 | 
18 | class AgentState(BaseState):
19 |     pass
20 | 
21 | class SubGraphAgentState(BaseState):
22 |     rewrite_count: Annotated[int, operator.add]
23 |     tools: Annotated[list[ToolType], operator.add]
24 |     explanation: Annotated[str, operator.add]


--------------------------------------------------------------------------------
/app/agents/core/base_agent.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, List, Optional, Sequence
 2 | 
 3 | from app.agents.langchain.interface.events import AgentEvents
 4 | from app.agents.core.agent_state import AgentState
 5 | from app.agents.langchain.tools.tools import ToolProvider
 6 | from app.agents.langchain.edges.edges import EdgeProvider
 7 | from app.agents.langchain.nodes.nodes import NodeProvider
 8 | from app.utils.types import EdgeType, ToolType, NodeType
 9 | from langgraph.types import Command
10 | 
11 | class BaseAgent():
12 |     _instance = None
13 |     """Base class for all agents"""
14 |     def __new__(cls):
15 |         if cls._instance is None:
16 |             cls._instance = super().__new__(cls)
17 |         return cls._instance
18 |     
19 |     def __init__(
20 |         self,
21 |         tool_types: Optional[Sequence[ToolType]] = None, 
22 |         edge_types: Optional[Sequence[EdgeType]] = None, 
23 |         node_types: Optional[Sequence[NodeType]] = None
24 |     ):
25 |         self.tool_types = tool_types
26 |         self.edge_types = edge_types
27 |         self.node_types = node_types
28 |         self._tool_provider = None
29 |         self._edge_provider = None
30 |         self._nodes_provider = None
31 | 
32 |     @property
33 |     def tool_provider(self) -> ToolProvider:
34 |         if self._tool_provider is None:
35 |             self._tool_provider = ToolProvider()
36 |         return self._tool_provider
37 | 
38 |     @property
39 |     def edge_provider(self) -> EdgeProvider:
40 |         if self._edge_provider is None:
41 |             self._edge_provider = EdgeProvider()
42 |         return self._edge_provider
43 | 
44 |     @property
45 |     def nodes_provider(self) -> NodeProvider:
46 |         if self._nodes_provider is None:
47 |             self._nodes_provider = NodeProvider()
48 |         return self._nodes_provider
49 | 
50 |     # @lru_cache(maxsize=1)
51 |     def setup_tools(self) -> List[Any]:
52 |         """Get tools based on specified types or all available tools if none specified"""
53 |         return self.tool_provider.get_items_by_types(self.tool_types)
54 | 
55 |     # @lru_cache(maxsize=1)
56 |     def setup_edges(self) -> List[Any]:
57 |         """Get edges based on specified types or all available edges if none specified"""
58 |         return self.edge_provider.get_items_by_types(self.edge_types)
59 |     
60 |     # @lru_cache(maxsize=1)
61 |     def setup_nodes(self) -> List[Any]:
62 |         """Get nodes based on specified types or all available nodes if none specified"""
63 |         return self.nodes_provider.get_items_by_types(self.node_types)
64 |     
65 |     def setup_events(self) -> Any:
66 |         """Initialize workflow components when needed"""
67 |         tools = self.setup_tools()
68 |         edges = self.setup_edges()
69 |         nodes = self.setup_nodes()
70 | 
71 |         return AgentEvents.mapper(tools, edges, nodes)
72 | 
73 |     def inject_tools_and_template(self, tools, target_node, template):
74 |         """Create a wrapper node that injects tools into the state before execution.
75 |         
76 |         Args:
77 |             tools: Dictionary of tools to inject
78 |             target_node: The original node function to wrap
79 |             
80 |         Returns:
81 |             A wrapped node function that ensures tools are available in state
82 |         """
83 |         def wrapped_node(state: AgentState):
84 |             command = Command(
85 |                 goto=None,
86 |                 update={
87 |                     "tools": tools,
88 |                     "template": template
89 |                 }
90 |             )
91 |             
92 |             return target_node(state | command.update)
93 |             
94 |         return wrapped_node


--------------------------------------------------------------------------------
/app/agents/implementations/blog_post/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Deluxer/oliva/fac56e1afb72c2ff151e8e33ecf4896080835775/app/agents/implementations/blog_post/__init__.py


--------------------------------------------------------------------------------
/app/agents/implementations/blog_post/agent.py:
--------------------------------------------------------------------------------
 1 | from functools import lru_cache
 2 | from app.agents.core.agent_state import SubGraphAgentState
 3 | from app.agents.core.base_agent import BaseAgent
 4 | from app.utils.types import EdgeType, NodeType, ToolType
 5 | from langgraph.graph import END, START, StateGraph
 6 | from langgraph.prebuilt import ToolNode, tools_condition
 7 | from app.agents.langchain.factory import AgentFactory
 8 | from typing import Dict, Any
 9 | from app.utils.prompts import prompts
10 | 
11 | class BlogPostAgent(BaseAgent):
12 |     """Agent specialized in searching and analyzing blog posts"""
13 |     _instance = None
14 |     
15 |     def __new__(cls):
16 |         if cls._instance is None:
17 |             cls._instance = super().__new__(cls)
18 |             cls._instance.__init__()
19 |         return cls._instance
20 |     
21 |     def __init__(self):
22 |         if not hasattr(self, '_initialized'):
23 |             super().__init__(
24 |                 tool_types=[ToolType.BLOG_SEARCH],
25 |                 edge_types=[EdgeType.GRADE_DOCUMENTS],
26 |                 node_types=[NodeType.AGENT, NodeType.GENERATE, NodeType.REWRITE]
27 |             )
28 |             self._initialized = True
29 |             self._workflow = None
30 |     
31 |     @lru_cache(maxsize=1)
32 |     def prepare(self):
33 |         """Prepare the agent workflow only when needed"""
34 |         self._workflow = StateGraph(SubGraphAgentState)
35 |         events = self.setup_events()
36 |         tools, _, nodes = events
37 | 
38 |         # Dynamic injection of tools into the agent node
39 |         agent = self.inject_tools_and_template(tools, nodes[NodeType.AGENT], prompts.BLOG_SEARCH_PROMPT)
40 |         self._workflow.add_node("agent", agent)
41 |         self._workflow.add_edge(START, "agent")
42 | 
43 |     def process(self, input_state: Dict[str, Any]) -> Dict[str, Any]:
44 |         self.prepare()
45 |         return AgentFactory.create_agent(self._workflow, input_state)
46 | 
47 |     def studio(self) -> Dict[str, Any]:
48 |         """Compile workflow for LangGraph Studio"""
49 |         self.prepare()
50 |         return self._workflow.compile()
51 | 
52 | agent = BlogPostAgent()
53 | 
54 | # Initialize graph for LangGraph Studio
55 | graph = agent.studio()


--------------------------------------------------------------------------------
/app/agents/implementations/main_deprecated.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Type
 2 | from agents.base.agent import BaseAgent
 3 | from agents.blog_post.agent import BlogPostAgent
 4 | 
 5 | class CallAgent:
 6 |     """Factory for creating different types of agents"""
 7 |     
 8 |     _agents: Dict[str, Type[BaseAgent]] = {
 9 |         "blog_search": BlogPostAgent,
10 |         # Add more agents here as they are implemented
11 |     }
12 |     
13 |     @classmethod
14 |     def create_agent(cls, agent_type: str) -> BaseAgent:
15 |         """Create an agent instance based on the specified type"""
16 |         if agent_type not in cls._agents:
17 |             raise ValueError(f"Unknown agent type: {agent_type}")
18 |         
19 |         return cls._agents[agent_type]()
20 | 
21 | def process_query(agent_type: str, query: str) -> Dict:
22 |     """Process a query using the specified agent type"""
23 |     agent = CallAgent.create_agent(agent_type)
24 |     return agent.process_input({"query": query})
25 | 
26 | def get_blog_posts():
27 |     return process_query("blog_search", "How Harrison Chase defines an agent?")


--------------------------------------------------------------------------------
/app/agents/implementations/search_amazon_products/agent_by_json.py:
--------------------------------------------------------------------------------
 1 | from langgraph.graph import END, START, StateGraph
 2 | from app.agents.core.agent_state import AgentState
 3 | from functools import lru_cache
 4 | from langgraph.prebuilt import ToolNode, tools_condition
 5 | 
 6 | from app.agents.core.base_agent import BaseAgent
 7 | from app.agents.langchain.factory import AgentFactory
 8 | from app.utils.types import ToolType, EdgeType, NodeType
 9 | from app.utils.prompts import prompts
10 | 
11 | class SearchAmazonProductsAgentByJson(BaseAgent):
12 |     """Agent specialized in searching amazon products"""
13 |     _instance = None
14 |     
15 |     def __new__(cls):
16 |         if cls._instance is None:
17 |             cls._instance = super().__new__(cls)
18 |         return cls._instance
19 |     
20 |     def __init__(self):
21 |         if not hasattr(self, '_initialized'):
22 |             super().__init__(
23 |                 tool_types=[ToolType.AMAZON_PRODUCTS_SEARCH_BY_JSON],
24 |                 edge_types=[EdgeType.GRADE_DOCUMENTS],
25 |                 node_types=[NodeType.AGENT, NodeType.GENERATE, NodeType.REWRITE]
26 |             )
27 |             self._workflow = None
28 |             self._initialized = True
29 | 
30 |     @lru_cache(maxsize=1)
31 |     def prepare(self):
32 |         """Initialize workflow components and configure the graph structure."""
33 |         self._workflow = StateGraph(AgentState)
34 | 
35 |         events = self.setup_events()
36 |         tools, edges, nodes = events
37 |         agent = self.inject_tools_and_template(tools, nodes[NodeType.AGENT], prompts.AGENT_PROMPT_BY_JSON)
38 | 
39 |         self._workflow.add_node("agent", agent)
40 |         self._workflow.add_node("retrieve", ToolNode(tools))
41 |         self._workflow.add_node("rewrite", nodes[NodeType.REWRITE])
42 |         self._workflow.add_node("generate", nodes[NodeType.GENERATE])
43 |         self._workflow.add_edge(START, "agent")
44 |         self._workflow.add_conditional_edges(
45 |             "agent",
46 |             tools_condition,
47 |             {"tools": "retrieve", END: END}
48 |         )
49 |         self._workflow.add_conditional_edges(
50 |             "retrieve",
51 |             edges[EdgeType.GRADE_DOCUMENTS],
52 |             {"generate": "generate", "rewrite": "rewrite"}
53 |         )
54 |         self._workflow.add_edge("generate", END)
55 | 
56 |     def process(self, input_state: dict):
57 |         self.prepare()
58 |         
59 |         result = AgentFactory.create_agent(self._workflow, input_state)
60 |         return result
61 | 
62 |     def studio(self):
63 |         """Compile workflow for LangGraph Studio"""
64 |         self.prepare()
65 |         return self._workflow.compile()
66 | 
67 | agent = SearchAmazonProductsAgentByJson()
68 | 
69 | # Initialize graph for LangGraph Studio
70 | graph = agent.studio()


--------------------------------------------------------------------------------
/app/agents/implementations/search_amazon_products/agent_by_superlinked.py:
--------------------------------------------------------------------------------
 1 | from langgraph.graph import END, START, StateGraph
 2 | from app.agents.core.agent_state import AgentState
 3 | from functools import lru_cache
 4 | from typing import Dict, Any
 5 | 
 6 | from app.agents.core.base_agent import BaseAgent
 7 | from app.agents.langchain.factory import AgentFactory
 8 | from app.utils.types import NodeType, ToolType
 9 | from app.utils.prompts import prompts
10 | 
11 | class SearchAmazonProductsAgentBySuperlinked(BaseAgent):
12 |     """Agent specialized in searching amazon products"""
13 |     _instance = None
14 |     
15 |     def __new__(cls):
16 |         if cls._instance is None:
17 |             cls._instance = super().__new__(cls)
18 |             cls._instance.__init__()
19 |         return cls._instance
20 |     
21 |     def __init__(self):
22 |         if not hasattr(self, '_initialized'):
23 |             super().__init__(
24 |                 tool_types=[ToolType.AMAZON_PRODUCTS_SEARCH_BY_SUPERLINKED],
25 |                 edge_types=[],
26 |                 node_types=[NodeType.AGENT]
27 |             )
28 |             self._workflow = None
29 |             self._initialized = True
30 | 
31 |     @lru_cache(maxsize=1)
32 |     def prepare(self):
33 |         """Initialize workflow components and configure the graph structure."""
34 |         self._workflow = StateGraph(AgentState)
35 |             
36 |         events = self.setup_events()
37 |         tools, _, nodes = events
38 |         # Dynamic injection of tools into the agent node
39 |         agent = self.inject_tools_and_template(tools, nodes[NodeType.AGENT], prompts.AGENT_PROMPT_BY_SUPERLINKED)
40 |         self._workflow.add_node("agent", agent)
41 | 
42 |         self._workflow.set_entry_point('agent')
43 |         self._workflow.add_edge(START, "agent")
44 |         self._workflow.add_edge("agent", END)
45 | 
46 |     def process(self, input_state: Dict[str, Any]) -> Dict[str, Any]:
47 |         self.prepare()
48 |         return AgentFactory.create_agent(self._workflow, input_state)
49 | 
50 |     def studio(self) -> Dict[str, Any]:
51 |         """Compile workflow for LangGraph Studio"""
52 |         self.prepare()
53 |         return self._workflow.compile()
54 | 
55 | agent = SearchAmazonProductsAgentBySuperlinked()
56 | 
57 | # Initialize graph for LangGraph Studio
58 | graph = agent.studio()


--------------------------------------------------------------------------------
/app/agents/implementations/supervisor.py:
--------------------------------------------------------------------------------
 1 | from langgraph.graph import END, START, StateGraph
 2 | from langgraph.prebuilt import tools_condition
 3 | from app.agents.core.agent_state import AgentState
 4 | from functools import lru_cache
 5 | from typing import Dict, Any
 6 | 
 7 | from app.agents.core.base_agent import BaseAgent
 8 | from app.agents.implementations.blog_post.agent import graph as graph_blog
 9 | from app.agents.implementations.search_amazon_products.agent_by_superlinked import graph as graph_search_by_superlinked
10 | from app.agents.langchain.factory import AgentFactory
11 | from app.utils.types import NodeType
12 | from app.agents.langchain.memory.long_term import long_term_memory
13 | from functools import partial
14 | 
15 | class SupervisorAgent(BaseAgent):
16 |     """Agent specialized in supervising other agents"""
17 |     _instance = None
18 |     
19 |     def __new__(cls):
20 |         if cls._instance is None:
21 |             cls._instance = super().__new__(cls)
22 |             cls._instance.__init__()
23 |         return cls._instance
24 | 
25 |     def __init__(self):
26 |         if not hasattr(self, '_initialized'):
27 |             super().__init__(
28 |                 tool_types=[],
29 |                 edge_types=[],
30 |                 node_types=[NodeType.SUPERVISOR]
31 |             )
32 |             self._workflow = None
33 |             self._initialized = True        
34 | 
35 |     @lru_cache(maxsize=1)
36 |     def prepare(self):
37 |         """Initialize workflow components and configure the graph structure."""
38 |         self._workflow = StateGraph(AgentState)
39 |         events = self.setup_events()
40 |         _, _, nodes = events
41 |         
42 |         self._workflow.add_node("supervisor", partial(nodes[NodeType.SUPERVISOR], store=long_term_memory))
43 |         self._workflow.add_node("amazon_products_agent", graph_search_by_superlinked)
44 |         self._workflow.add_node("blog_post_agent", graph_blog)
45 |         
46 |         self._workflow.add_edge(START, "supervisor")
47 |         self._workflow.add_conditional_edges(
48 |             "supervisor",
49 |             tools_condition,
50 |             {
51 |                 "amazon_products_agent": "amazon_products_agent", 
52 |                 "blog_post_agent": "blog_post_agent", 
53 |                 END: END
54 |             }
55 |         )
56 |         self._workflow.add_edge("supervisor", END)
57 |         self._workflow.add_edge("blog_post_agent", END)
58 |         self._workflow.add_edge("amazon_products_agent", END)
59 |         
60 |     def process(self, input_state: Dict[str, Any]) -> Dict[str, Any]:
61 |         """Process input through the workflow"""
62 |         self.prepare()
63 |         return AgentFactory.create_agent(self._workflow, input_state)
64 | 
65 |     def studio(self) -> Dict[str, Any]:
66 |         """Compile workflow for LangGraph Studio"""
67 |         self.prepare()
68 |         return self._workflow.compile()
69 | 
70 | agent = SupervisorAgent()
71 | 
72 | graph = agent.studio()


--------------------------------------------------------------------------------
/app/agents/langchain/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | LangChain integration module for Agentic RAG implementation.
 3 | """
 4 | 
 5 | from .nodes.agent import agent
 6 | from .nodes.generate import generate
 7 | from .nodes.rewrite import rewrite
 8 | 
 9 | __all__ = [
10 |     'agent',
11 |     'generate',
12 |     'rewrite',
13 | ]


--------------------------------------------------------------------------------
/app/agents/langchain/edges/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Node implementations for the Agentic RAG workflow.
3 | """
4 | 
5 | from .edges import EdgeProvider
6 | 
7 | __all__ = ['EdgeProvider']


--------------------------------------------------------------------------------
/app/agents/langchain/edges/check_relevance.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Any, Literal
 2 | from langchain_core.messages import AIMessage
 3 | 
 4 | def check_relevance(state) -> Literal["generate", "rewrite"]:
 5 |     """Check relevance of retrieved documents."""
 6 |     print("---CHECK RELEVANCE---")
 7 |     
 8 |     messages = state["messages"]
 9 |     last_message = messages[-1]
10 |     
11 |     if not isinstance(last_message, AIMessage):
12 |         raise ValueError("The 'checkRelevance' node requires the most recent message to be an AIMessage")
13 |         
14 |     if not hasattr(last_message, "tool_calls"):
15 |         raise ValueError("The 'checkRelevance' node requires the most recent message to contain tool calls")
16 |         
17 |     tool_calls = last_message.tool_calls
18 |     if not tool_calls or len(tool_calls) == 0:
19 |         raise ValueError("Last message was not a function message")
20 |     
21 |     if tool_calls[0].args.get("binary_score") == "yes":
22 |         print("---DECISION: DOCS RELEVANT---")
23 |         return "generate"
24 |         
25 |     print("---DECISION: DOCS NOT RELEVANT---")
26 |     return "rewrite"


--------------------------------------------------------------------------------
/app/agents/langchain/edges/edges.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, Literal
 2 | import logging
 3 | from importlib import import_module
 4 | from app.utils.types import EdgeType
 5 | from app.agents.langchain.interface.base_provider import BaseProvider
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | class EdgeProvider(BaseProvider[EdgeType]):
10 |     """Edge provider for blog post related operations"""
11 | 
12 |     def __init__(self):
13 |         self._edge_imports: Dict[EdgeType, tuple[str, str]] = {
14 |             EdgeType.GRADE_DOCUMENTS: ("app.agents.langchain.edges.grade_documents", "grade_documents"),
15 |         }
16 |         super().__init__()
17 | 
18 |     def _initialize_items(self) -> None:
19 |         """Lazy-load edges dynamically to prevent unnecessary imports."""
20 |         self._items = {}
21 | 
22 |         for edge_type, (module_path, func_name) in self._edge_imports.items():
23 |             try:
24 |                 module = import_module(module_path)
25 |                 self._items[edge_type] = getattr(module, func_name)
26 |             except (ImportError, AttributeError) as e:
27 |                 logger.error(f"Failed to import edge {edge_type}: {e}")
28 | 
29 |     def get_items(self) -> Dict[EdgeType, Any]:
30 |         """Get all edges"""
31 |         return self._items
32 | 
33 |     def evaluate(self, state: Any) -> Literal["generate", "rewrite", str]:
34 |         """Evaluate the state using the appropriate edge condition"""
35 |         edges = self.get_items()
36 |         if EdgeType.GRADE_DOCUMENTS in edges:
37 |             return edges[EdgeType.GRADE_DOCUMENTS](state)
38 |         return "generate"  # Fallback
39 | 


--------------------------------------------------------------------------------
/app/agents/langchain/edges/grade_documents.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | from langchain_core.prompts import PromptTemplate
 3 | from langchain_openai import ChatOpenAI
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | from app.agents.core.agent_state import AgentState
 7 | from app.utils.constants import constants
 8 | from app.utils.prompts import prompts
 9 | 
10 | def grade_documents(state: AgentState) -> Literal["generate", "rewrite"]:
11 |     """Determines whether the retrieved documents are relevant to the question."""
12 |     class grade(BaseModel):
13 |         """Binary score for relevance check."""
14 |         binary_score: str = Field(description="Relevance score 'yes' or 'no'")
15 |         explanation: str = Field(description="Brief explanation of the relevance decision")
16 | 
17 |     model = ChatOpenAI(temperature=0, model=constants.LLM_MODEL, streaming=True)
18 |     llm_with_tool = model.with_structured_output(grade)
19 | 
20 |     prompt = PromptTemplate(
21 |         template=prompts.GRADE_DOCUMENTS_PROMPT_OPT_2,
22 |         input_variables=["context", "question"],
23 |     )
24 | 
25 |     chain = prompt | llm_with_tool
26 |     messages = state["messages"]
27 |     question = messages[0].content
28 |     docs = messages[-1].content
29 |     
30 |     rewrite_count = state["rewrite_count"]
31 |     
32 |     # Define a max rewrite limit to avoid infinite loops
33 |     MAX_REWRITE_ATTEMPTS = 2
34 | 
35 |     scored_result = chain.invoke({"question": question, "context": docs})
36 |     
37 |     if scored_result.binary_score == "yes":
38 |         return "generate"
39 |     else:
40 |         state["rewrite_count"] = rewrite_count + 1
41 |         state["explanation"] = scored_result.explanation
42 | 
43 |         # Stop rewriting after max attempts
44 |         if state["rewrite_count"] >= MAX_REWRITE_ATTEMPTS:
45 |             return "generate"
46 | 
47 |     return "rewrite"
48 | 


--------------------------------------------------------------------------------
/app/agents/langchain/factory.py:
--------------------------------------------------------------------------------
 1 | from langgraph.graph import StateGraph
 2 | from langchain_core.messages import HumanMessage
 3 | from app.utils.helpers import invoke, stream
 4 | 
 5 | class AgentFactory:
 6 |     @staticmethod
 7 |     def create_agent(workflow: StateGraph, input_data: dict):
 8 |         """Execute the agent workflow without recompiling it"""
 9 |         current_query = input_data.get("query", "")
10 | 
11 |         if not isinstance(current_query, str) or not current_query.strip():
12 |             return {"messages": ["No valid query provided."]}
13 | 
14 |         graph = workflow.compile()
15 | 
16 |         formatted_input = {
17 |             "messages": ["user", current_query],
18 |             "tools": input_data.get("tools", []),
19 |             "template": input_data.get("template", ""),
20 |             "next": "agent"
21 |         }
22 | 
23 |         return invoke(graph, formatted_input)


--------------------------------------------------------------------------------
/app/agents/langchain/interface/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | LangChain integration module for Agentic RAG implementation.
 3 | """
 4 | 
 5 | from .events import AgentEvents
 6 | from .base_provider import BaseProvider
 7 | 
 8 | __all__ = [
 9 |     'AgentEvents',
10 |     'BaseProvider',
11 | ]


--------------------------------------------------------------------------------
/app/agents/langchain/interface/base_provider.py:
--------------------------------------------------------------------------------
 1 | # Add type hints for better code clarity
 2 | from abc import abstractmethod
 3 | from typing import Dict, List, Optional, Sequence, Any, TypeVar, Generic
 4 | 
 5 | T = TypeVar('T')
 6 | class BaseProvider(Generic[T]):
 7 |     def __init__(self):
 8 |         self._items: Dict[T, Any] = {}
 9 |         self._initialize_items()
10 |     
11 |     @abstractmethod
12 |     def _initialize_items(self) -> None:
13 |         pass
14 | 
15 |     @abstractmethod
16 |     def get_items(self) -> Dict[T, Any]:
17 |         return self._items
18 | 
19 |     def get_items_by_types(self, types: Optional[Sequence[T]]) -> List[Any]:
20 |         """Return items by types"""
21 |         items_dict = self.get_items()
22 |         # If no types specified, return empty dictionary
23 |         if not types:
24 |             return {}
25 |         
26 |         selected_items = {}
27 |         for item_type in types:
28 |             if item_type in items_dict:
29 |                 selected_items[item_type] = items_dict[item_type]
30 |         return selected_items


--------------------------------------------------------------------------------
/app/agents/langchain/interface/events.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, List, Type, Union
 2 | from app.agents.core.agent_state import AgentState
 3 | from app.agents.langchain.interface.base_provider import BaseProvider
 4 | 
 5 | class AgentEvents:
 6 |     @staticmethod
 7 |     def mapper(
 8 |         tools: Union[List[Any], Type[BaseProvider]],
 9 |         edges: Union[List[Any], Type[BaseProvider]],
10 |         nodes: Union[List[Any], Type[BaseProvider]],
11 |     ) -> List[Any]:
12 |         tools_list = list(tools.values())
13 |         return [tools_list, edges, nodes]


--------------------------------------------------------------------------------
/app/agents/langchain/memory/long_term.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, List, Optional, Tuple, Union
  2 | from datetime import datetime
  3 | import uuid
  4 | import logging
  5 | 
  6 | from langchain_openai import OpenAIEmbeddings, ChatOpenAI
  7 | from qdrant_client import QdrantClient, models
  8 | from app.agents.config.qdrant import QdrantConfig
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | class LongTermMemoryStore():
 13 |     """Long-term memory store implementation using Qdrant for semantic search via LangChain"""
 14 | 
 15 |     def __init__(
 16 |         self,
 17 |         collection_name: Optional[str] = None,
 18 |         embedding_model: str = "text-embedding-3-small",
 19 |         embedding_dims: int = 1536,
 20 |     ):
 21 |         """Initialize the memory store with Qdrant connection and in-memory store"""
 22 |         config = QdrantConfig()
 23 |         
 24 |         try:
 25 |             self.embeddings = OpenAIEmbeddings(model=embedding_model, dimensions=embedding_dims)
 26 |         except Exception as e:
 27 |             logger.error(f"Failed to initialize embeddings: {e}")
 28 |             raise
 29 |             
 30 |         self.collection_name = collection_name or config.QDRANT_COLLECTION_NAME
 31 |         self.embedding_dims = embedding_dims
 32 |         
 33 |         url = config.QDRANT_URL.get_secret_value()
 34 |         api_key = config.QDRANT_API_KEY.get_secret_value()
 35 |         
 36 |         try:
 37 |             self.client = QdrantClient(
 38 |                 url=url,
 39 |                 api_key=api_key,
 40 |                 timeout=10.0
 41 |             )
 42 |             
 43 |             # Check if collection exists, if not create it
 44 |             collections = self.client.get_collections().collections
 45 |             collection_names = [collection.name for collection in collections]
 46 |             
 47 |             if self.collection_name not in collection_names:
 48 |                 logger.info(f"Creating new collection: {self.collection_name}")
 49 |                 self.client.create_collection(
 50 |                     collection_name=self.collection_name,
 51 |                     vectors_config=models.VectorParams(
 52 |                         size=self.embedding_dims,
 53 |                         distance=models.Distance.COSINE
 54 |                     )
 55 |                 )
 56 |                 logger.info(f"Created new collection: {self.collection_name}")
 57 |             
 58 |         except Exception as e:
 59 |             logger.error(f"Failed to initialize Qdrant store: {e}")
 60 |             raise
 61 | 
 62 |     def put(
 63 |         self,
 64 |         namespace: Union[Tuple[str, str], str],
 65 |         key: str,
 66 |         value: Dict[str, Any],
 67 |         *,
 68 |         index: Optional[Union[List[str], bool]] = None
 69 |     ) -> None:
 70 |         """Store a value in both Qdrant and in-memory store with semantic indexing
 71 |         
 72 |         Args:
 73 |             namespace: Either a tuple of (user_id, memory_type) or a string in format "user_id:memory_type"
 74 |             key: Unique identifier for the memory
 75 |             value: Dictionary containing the memory data
 76 |             index: List of fields to embed, or False to skip embedding
 77 |         """
 78 |         # Convert tuple namespace to string if needed
 79 |         if isinstance(namespace, tuple):
 80 |             namespace = f"{namespace[0]}:{namespace[1]}"
 81 | 
 82 |         if index is False:
 83 |             return
 84 |             
 85 |         try:
 86 |             fields_to_embed = index if isinstance(index, list) else ["memory"]
 87 |             
 88 |             texts_to_embed = []
 89 |             for field in fields_to_embed:
 90 |                 if field in value:
 91 |                     field_value = value[field]
 92 |                     if isinstance(field_value, str):
 93 |                         texts_to_embed.append(field_value)
 94 |                     else:
 95 |                         texts_to_embed.append(str(field_value))
 96 |             
 97 |             text_to_embed = " ".join(texts_to_embed)
 98 |             if not text_to_embed.strip():
 99 |                 logger.warning(f"No text to embed found in fields: {fields_to_embed}")
100 |                 return
101 |                 
102 |             vector = self.embeddings.embed_query(text_to_embed)
103 |             
104 |             point_id = str(uuid.uuid4())
105 |             self.client.upsert(
106 |                 collection_name=self.collection_name,
107 |                 points=[
108 |                     models.PointStruct(
109 |                         id=point_id,
110 |                         payload={
111 |                             "namespace": namespace,
112 |                             "key": key,
113 |                             "value": value,
114 |                             "created_at": datetime.now().isoformat(),
115 |                             "updated_at": datetime.now().isoformat(),
116 |                             "embedded_fields": fields_to_embed
117 |                         },
118 |                         vector=vector
119 |                     )
120 |                 ]
121 |             )
122 |             logger.info(f"Successfully stored vector with id {point_id} in Qdrant")
123 |         except Exception as e:
124 |             logger.error(f"Failed to store in Qdrant: {e}")
125 | 
126 |     def search(
127 |         self,
128 |         namespace: Union[Tuple[str, str], str],
129 |         query: str,
130 |         limit: int = 5,
131 |         *,
132 |         score_threshold: float = 0.5
133 |     ) -> List[Dict[str, Any]]:
134 |         """Search for memories semantically similar to the query
135 |         
136 |         Args:
137 |             namespace: Either a tuple of (user_id, memory_type) or a string in format "user_id:memory_type"
138 |             query: Query text to search for
139 |             limit: Maximum number of results to return
140 |             score_threshold: Minimum similarity score (0-1) for results
141 |         
142 |         Returns:
143 |             List of memories with their similarity scores
144 |         """
145 |         # Convert tuple namespace to string if needed
146 |         if isinstance(namespace, tuple):
147 |             namespace = f"{namespace[0]}:{namespace[1]}"
148 | 
149 |         try:
150 |             query_vector = self.embeddings.embed_query(query)
151 |             
152 |             search_result = self.client.search(
153 |                 collection_name=self.collection_name,
154 |                 query_vector=query_vector,
155 |                 query_filter=models.Filter(
156 |                     should=[
157 |                         models.FieldCondition(
158 |                             key="namespace",
159 |                             match=models.MatchValue(value=namespace)
160 |                         )
161 |                     ]
162 |                 ),
163 |                 limit=limit,
164 |                 score_threshold=score_threshold
165 |             )
166 | 
167 |             results = []
168 |             for hit in search_result:
169 |                 memory = hit.payload["value"]
170 |                 results.append({
171 |                     "value": memory,
172 |                     "score": hit.score,
173 |                     "id": hit.id,
174 |                     "created_at": hit.payload.get("created_at"),
175 |                     "embedded_fields": hit.payload.get("embedded_fields", [])
176 |                 })
177 |             
178 |             logger.debug(f"Found {len(results)} similar memories")
179 |             return results
180 |             
181 |         except Exception as e:
182 |             logger.error(f"Failed to search memories: {e}", exc_info=True)
183 |             return []
184 |     
185 |     def evaluate(self, query: str, memories: List[Dict[str, Any]]) -> Union[bool, str]:
186 |         """Evaluate if the memories are relevant to the current query using LLM.
187 |         
188 |         Args:
189 |             query: Current user query
190 |             memories: List of retrieved memories with their metadata
191 |             
192 |         Returns:
193 |             Union[bool, str]: Either a boolean indicating if memories are relevant,
194 |                             or a string containing the refined response
195 |         """
196 |         try:
197 |             llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
198 |             
199 |             memory = memories[0]["value"]
200 |             memory_query = memory["query"]
201 |             memory_response = memory["response"]
202 |             similarity_score = memories[0]["score"]
203 | 
204 |             eval_prompt = f"""Evaluate if a previous response can be reused for a new query and refine it if needed.
205 | 
206 |             Current Query: "{query}"
207 | 
208 |             Previous Interaction:
209 |             Query: "{memory_query}"
210 |             Response: "{memory_response}"
211 |             Similarity Score: {similarity_score:.2f} (0-1 scale)
212 | 
213 |             Instructions:
214 |             1. First, evaluate if the queries are asking for similar information (e.g., both about books, products, etc.)
215 |             2. If queries are NOT similar, respond with "false"
216 |             3. If queries ARE similar:
217 |                - If ALL items in the previous response satisfy the current query's conditions, respond with "true"
218 |                - If SOME items match but others don't, respond with a REFINED version of the response that includes ONLY the matching items
219 |                - If NO items match the current query's conditions, respond with "false"
220 |             
221 |             Format your response as either:
222 |             - "false" if responses are completely different or no items match
223 |             - "true" if all items in the original response match
224 |             - A refined response starting with "REFINED:" that includes only matching items in the same format as the original response
225 | 
226 |             Example refinements:
227 |             REFINED:
228 |             Here are some books priced under $20:
229 |             1. **Book Title**
230 |                - Price: $15.99
231 |                - Rating: 4.5
232 |             [Rest of the refined response...]
233 | 
234 |             REFINED:
235 |             Harrison Chase defines an AI agent as...
236 |             [Rest of the refined response...]
237 |             """
238 | 
239 |             result = llm.invoke(eval_prompt)
240 |             
241 |             content = result.content.strip()
242 |             if content.startswith("REFINED:"):
243 |                 # Return the refined response
244 |                 return content[8:].strip()  # Remove "REFINED:" prefix
245 |             else:
246 |                 # Return boolean for true/false responses
247 |                 return content.lower() == "true"
248 |             
249 |         except Exception as e:
250 |             logger.error(f"Failed to evaluate memories: {e}", exc_info=True)
251 |             return False
252 | 
253 | long_term_memory = LongTermMemoryStore()


--------------------------------------------------------------------------------
/app/agents/langchain/nodes/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Node implementations for the Agentic RAG workflow.
3 | """
4 | 
5 | from .nodes import NodeProvider
6 | 
7 | __all__ = ['NodeProvider']


--------------------------------------------------------------------------------
/app/agents/langchain/nodes/agent.py:
--------------------------------------------------------------------------------
 1 | from langgraph.types import Command
 2 | from typing import Literal
 3 | from langgraph.checkpoint.memory import MemorySaver
 4 | from langchain_openai import ChatOpenAI
 5 | from langgraph.prebuilt import create_react_agent
 6 | 
 7 | from app.agents.core.agent_state import SubGraphAgentState
 8 | from app.utils.constants import constants
 9 | 
10 | def agent(state: SubGraphAgentState) -> Command[Literal['supervisor']]:
11 |     """
12 |     Agent that decides whether to use tools or not
13 |     """
14 |     tools = state["tools"]
15 |     template = state['template']
16 |     memory = MemorySaver()
17 |     model = ChatOpenAI(temperature=0, model=constants.LLM_MODEL)
18 |     messagesState = state["messages"]
19 |     agent = create_react_agent(
20 |         model,
21 |         tools,
22 |         prompt=template,
23 |         checkpointer=memory
24 |     )
25 | 
26 |     agent_response = agent.invoke({"messages": messagesState})
27 |     messages = agent_response["messages"]
28 | 
29 |     response = Command(
30 |         goto = 'supervisor',
31 |         update={"next": 'FINISH', "messages": messages},
32 |         graph=Command.PARENT
33 |     )
34 | 
35 |     return response
36 | 


--------------------------------------------------------------------------------
/app/agents/langchain/nodes/generate.py:
--------------------------------------------------------------------------------
 1 | from langchain_core.output_parsers import StrOutputParser
 2 | from langchain_openai import ChatOpenAI
 3 | from langchain_core.prompts import PromptTemplate
 4 | from langchain import hub
 5 | 
 6 | from app.agents.core.agent_state import AgentState
 7 | from app.utils.constants import constants
 8 | from app.utils.prompts import prompts
 9 | 
10 | def generate(state: AgentState):
11 |     """Generate answer based on retrieved documents"""
12 |     messages = state["messages"]
13 |     question = messages[0].content
14 |     last_message = messages[-1]
15 |     docs = last_message.content
16 |     rewrite_count = state.get("rewrite_count", 0)
17 | 
18 |     # If we've tried rewriting and still found no results, generate a "no results" response
19 |     if rewrite_count >= 1 and "DOCS NOT RELEVANT" in docs:
20 |         no_results_prompt = PromptTemplate(
21 |             template=prompts.NO_RESULTS_PROMPT,
22 |             input_variables=["question"]
23 |         )
24 |         llm = ChatOpenAI(model_name=constants.LLM_MODEL, temperature=0, streaming=True)
25 |         chain = no_results_prompt | llm | StrOutputParser()
26 |         response = chain.invoke({"question": question})
27 |     else:
28 |         # Prompt
29 |         prompt = hub.pull("rlm/rag-prompt")
30 |         llm = ChatOpenAI(model_name=constants.LLM_MODEL, temperature=0.3, streaming=True)
31 |         chain = prompt | llm | StrOutputParser()
32 |         response = chain.invoke({"context": docs, "question": question})
33 |     
34 |     return {"messages": response}


--------------------------------------------------------------------------------
/app/agents/langchain/nodes/nodes.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, Literal
 2 | import logging
 3 | from importlib import import_module
 4 | from app.utils.types import NodeType
 5 | from app.agents.langchain.interface.base_provider import BaseProvider
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | class NodeProvider(BaseProvider[NodeType]):
10 |     """Node provider for blog post related operations"""
11 | 
12 |     def __init__(self):
13 |         self._node_imports: Dict[NodeType, tuple[str, str]] = {
14 |             NodeType.AGENT: ("app.agents.langchain.nodes.agent", "agent"),
15 |             NodeType.GENERATE: ("app.agents.langchain.nodes.generate", "generate"),
16 |             NodeType.REWRITE: ("app.agents.langchain.nodes.rewrite", "rewrite"),
17 |             NodeType.SUPERVISOR: ("app.agents.langchain.nodes.supervisor", "supervisor"),
18 |         }
19 |         super().__init__()
20 | 
21 |     def _initialize_items(self) -> None:
22 |         """Lazy-load nodes dynamically to prevent unnecessary imports."""
23 |         self._items = {}
24 | 
25 |         for node_type, (module_path, func_name) in self._node_imports.items():
26 |             try:
27 |                 module = import_module(module_path)
28 |                 self._items[node_type] = getattr(module, func_name)
29 |             except (ImportError, AttributeError) as e:
30 |                 logger.error(f"Failed to import node {node_type}: {e}")
31 | 
32 |     def get_items(self) -> Dict[NodeType, Any]:
33 |         """Get all nodes"""
34 |         return self._items
35 | 
36 |     def evaluate(self, state: Any) -> Literal["generate", "rewrite", str]:
37 |         """Evaluate the state using the appropriate node condition"""
38 |         nodes = self.get_items()
39 |         if NodeType.GENERATE in nodes:
40 |             return nodes[NodeType.GENERATE](state)
41 |         return "generate"  # Fallback
42 | 


--------------------------------------------------------------------------------
/app/agents/langchain/nodes/rewrite.py:
--------------------------------------------------------------------------------
 1 | from langchain_core.messages import HumanMessage
 2 | from langchain_openai import ChatOpenAI
 3 | from typing import Dict, Any
 4 | 
 5 | from app.agents.core.agent_state import AgentState
 6 | from app.utils.constants import constants
 7 | 
 8 | def rewrite(state: AgentState) -> Dict[str, Any]:
 9 |     messages = state["messages"]
10 |     question = messages[0].content
11 |     rewrite_count = state.get("rewrite_count", 0)
12 | 
13 |     # Max rewrite attempts to prevent looping
14 |     MAX_REWRITE_ATTEMPTS = 2
15 |     if rewrite_count >= MAX_REWRITE_ATTEMPTS:
16 |         return {"messages": messages, "rewrite_count": rewrite_count}
17 | 
18 |     strategy = "Make the question more specific" if rewrite_count == 0 else "Broaden search scope"
19 | 
20 |     msg = [
21 |         HumanMessage(
22 |             content=f"""Transform this question for better results.
23 |             Original question: {question}
24 |             Strategy: {strategy}
25 |             Provide only the rewritten question, no explanations."""
26 |         )
27 |     ]
28 | 
29 |     model = ChatOpenAI(temperature=0, model=constants.LLM_MODEL, streaming=True)
30 |     response = model.invoke(msg)
31 | 
32 |     return {
33 |         "messages": [response],
34 |         "rewrite_count": rewrite_count + 1
35 |     }
36 | 


--------------------------------------------------------------------------------
/app/agents/langchain/nodes/supervisor.py:
--------------------------------------------------------------------------------
 1 | from langchain_core.runnables import RunnableConfig
 2 | from langchain_openai import ChatOpenAI
 3 | from langgraph.graph import END
 4 | from langgraph.types import Command
 5 | from langchain.schema import BaseStore
 6 | from typing import TypedDict, Literal
 7 | import uuid
 8 | from datetime import datetime
 9 | 
10 | from app.utils.prompts import prompts
11 | from app.utils.constants import constants
12 | from app.agents.core.agent_state import AgentState
13 | 
14 | supervisor_llm = ChatOpenAI(model=constants.LLM_MODEL)
15 | 
16 | class SupervisorOutput(TypedDict):
17 |     next: Literal["blog_post_agent", "amazon_products_agent", "FINISH"]
18 |     task_description_for_agent: str
19 |     message_completion_summary: str
20 | 
21 | def supervisor(
22 |     state: AgentState,
23 |     config: RunnableConfig,
24 |     *,
25 |     store: BaseStore
26 | ) -> Command[Literal["blog_post_agent", "amazon_products_agent", END]]:
27 |     """
28 |     This node is responsible for delegating tasks to other agents.
29 |     It first checks the semantic memory for similar previous queries and their results.
30 |     If a similar query exists, it may reuse the results or decide to refresh them.
31 |     Otherwise, it delegates the task to the appropriate agent.
32 |     """
33 |     user_id = config["configurable"]["user_id"]
34 |     chat_id = config["configurable"]["chat_id"]
35 | 
36 |     # Get the initial query
37 |     initial_query = state["messages"][0].content
38 | 
39 |     if state.get("next") == "FINISH":
40 |         response = state["messages"][-1].content
41 |         memory_text = f"User Query: {initial_query}\nAgent Response: {response}"
42 |         
43 |         store.put(
44 |             namespace=f"{user_id}:{chat_id}:memories",  
45 |             key=str(uuid.uuid4()),
46 |             value={
47 |                 "query": initial_query,
48 |                 "response": response,
49 |                 "memory": memory_text,  
50 |                 "created_at": datetime.now().isoformat(),
51 |                 "type": "conversation"
52 |             },
53 |             index=["memory"]  
54 |         )        
55 |         return Command(goto=END, update={"next": END})
56 | 
57 |     # Get the current user query
58 |     current_query = state["messages"][-1].content
59 | 
60 |     memories = store.search(
61 |         namespace=f"{user_id}:{chat_id}:memories",  
62 |         query=current_query,
63 |         limit=3
64 |     )
65 | 
66 |     if(memories):
67 |         evaluation_result = store.evaluate(query=current_query, memories=memories)
68 |         if isinstance(evaluation_result, str):
69 |             return Command(goto=END, update={"next": END, "messages": evaluation_result})
70 |         elif evaluation_result:
71 |             memory_response = memories[0]["value"]["response"]
72 |             return Command(goto=END, update={"next": END, "messages": memory_response})
73 | 
74 |     members = ["blog_post_agent", "amazon_products_agent"]
75 |     agent_members_prompt_final = f"""
76 |     blog_post_agent:
77 |         - Prompt: {prompts.BLOG_SEARCH_PROMPT}
78 |     amazon_products_agent:
79 |         - Prompt: {prompts.AMAZON_SEARCH_PROMPT}
80 |     """
81 |     supervisor_system_prompt = prompts.supervisor_system_prompt(members, agent_members_prompt_final)
82 |     messages = [{"role": "system", "content": supervisor_system_prompt}] + state["messages"]
83 |     response = supervisor_llm.with_structured_output(SupervisorOutput).invoke(messages)
84 |     goto = response["next"]
85 | 
86 |     if goto == "FINISH":
87 |         return Command(goto=END, update={"next": END, "messages": response["message_completion_summary"]})
88 | 
89 |     new_messages = [{"role": "assistant", "content": response["task_description_for_agent"]}]
90 |     return Command(goto=goto, update={"next": goto, "messages": new_messages})


--------------------------------------------------------------------------------
/app/agents/langchain/template.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from typing import Any, List, Type
 3 | from langchain_core.prompts import PromptTemplate
 4 | from langchain_openai import ChatOpenAI
 5 | 
 6 | from app.utils.constants import constants
 7 | from app.agents.langchain.state import AgentState
 8 | 
 9 | class LangChainTemplate:
10 |     """Factory for creating LangChain components"""
11 |     
12 |     @staticmethod
13 |     def create_llm(model_name: str = None, **kwargs) -> ChatOpenAI:
14 |         """Create a ChatOpenAI instance with specified parameters"""
15 |         return ChatOpenAI(
16 |             temperature=kwargs.get('temperature', 0),
17 |             model=model_name or constants.LLM_MODEL,
18 |             streaming=kwargs.get('streaming', True)
19 |         )
20 |     
21 |     @staticmethod
22 |     def create_prompt(template: str, input_variables: List[str]) -> PromptTemplate:
23 |         """Create a prompt template"""
24 |         return PromptTemplate(
25 |             template=template,
26 |             input_variables=input_variables
27 |         )
28 |     
29 |     @staticmethod
30 |     def create_state() -> Type[AgentState]:
31 |         """Create the agent state type"""
32 |         return AgentState
33 |     
34 |     @staticmethod
35 |     def create_chain(prompt: PromptTemplate, llm: ChatOpenAI, output_parser: Any = None):
36 |         """Create a chain with the given components"""
37 |         if output_parser:
38 |             return prompt | llm | output_parser
39 |         return prompt | llm


--------------------------------------------------------------------------------
/app/agents/langchain/tools/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Node implementations for the Agentic RAG workflow.
3 | """
4 | 
5 | from .tools import ToolProvider
6 | 
7 | __all__ = ['ToolProvider']


--------------------------------------------------------------------------------
/app/agents/langchain/tools/amazon_products_search.py:
--------------------------------------------------------------------------------
 1 | from langchain_core.tools import tool
 2 | from app.agents.langchain.vector_store.json_retriever import json_retriever
 3 | from langchain_core.messages import AIMessage
 4 | 
 5 | @tool('search_products_by_json', description="Tool for searching products based on a user's query.")
 6 | def by_json(query: str):
 7 |     """Search for Amazon products using JSON data"""
 8 |     retriever = json_retriever()
 9 |     docs = retriever.invoke(query)
10 |     doc_txt = docs[1].page_content
11 |     return AIMessage(content=doc_txt)
12 | 
13 | @tool('search_products_by_superlinked', description="Tool for searching products based on a user's query.")
14 | def by_superlinked(query: str):
15 |     """Search for Amazon products using Superlinked"""
16 |     from app.agents.langchain.vector_store.sl_amazon_products_retriever import superlinked_amazon_products_retriever
17 |     return superlinked_amazon_products_retriever(query)


--------------------------------------------------------------------------------
/app/agents/langchain/tools/blog_posts.py:
--------------------------------------------------------------------------------
 1 | from langchain_core.tools import tool
 2 | from app.agents.langchain.vector_store.url_retriever import url_retriever
 3 | from typing import List
 4 | from langchain_core.documents import Document
 5 | 
 6 | def format_search_results(docs: List[Document]) -> str:
 7 |     """Format search results in a clear and structured way"""
 8 |     if not docs:
 9 |         return "No relevant information found."
10 |     
11 |     # Remove duplicate content and sort by relevance
12 |     seen_content = set()
13 |     unique_docs = []
14 |     
15 |     for doc in docs:
16 |         content = doc.page_content.strip()
17 |         if content not in seen_content:
18 |             seen_content.add(content)
19 |             unique_docs.append(doc)
20 |     
21 |     formatted_results = []
22 |     for doc in unique_docs:
23 |         metadata = doc.metadata
24 |         title = metadata.get('title', 'No title')
25 |         source = metadata.get('source', 'No source')
26 |         content = doc.page_content.strip()
27 |         
28 |         # Clean up content formatting
29 |         content = content.replace('\n\t\n', '\n').replace('\n\n\n', '\n\n')
30 |         content = content.replace('\t', '').strip()
31 |         
32 |         if content:  # Only include non-empty content
33 |             result = f"Source: {source}\nTitle: {title}\nContent:\n{content}\n"
34 |             formatted_results.append(result)
35 |     
36 |     return "\n---\n".join(formatted_results)
37 | 
38 | @tool('blog_search', description="Search for specific information in blog posts.")
39 | def search_in_blog_posts_tool(query: str):
40 |     """Search for relevant information in blog posts
41 |     
42 |     Args:
43 |         query: The search query string
44 |         
45 |     Returns:
46 |         Formatted string containing relevant blog post content
47 |     """
48 |     retriever = url_retriever()
49 |     docs = retriever.invoke(query)
50 |     return format_search_results(docs)
51 | 
52 | @tool('blog_advance_search', description="Advanced search in blog posts with metadata filtering.")
53 | def search_in_blog_posts_tool_advance(query: str):
54 |     """Advanced search in blog posts with metadata filtering"""
55 |     # TODO: Implement advanced search with metadata filtering
56 |     pass
57 | 
58 | @tool('blog_summary', description="Generate a concise summary of blog post search results.")
59 | def search_in_blog_posts_tool_summary(query: str):
60 |     """Generate a concise summary of blog post search results"""
61 |     # TODO: Implement summary generation
62 |     pass


--------------------------------------------------------------------------------
/app/agents/langchain/tools/tools.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | import logging
 3 | from app.agents.langchain.interface.base_provider import BaseProvider
 4 | from app.utils.types import ToolType
 5 | 
 6 | # Explicitly import tool functions instead of using import_module
 7 | from app.agents.langchain.tools.blog_posts import (
 8 |     search_in_blog_posts_tool,
 9 |     search_in_blog_posts_tool_advance,
10 |     search_in_blog_posts_tool_summary,
11 | )
12 | from app.agents.langchain.tools.amazon_products_search import (
13 |     by_json,
14 |     by_superlinked,
15 | )
16 | 
17 | class ToolProvider(BaseProvider[ToolType]):
18 |     """Provider for all available tools"""
19 |     _instance = None
20 | 
21 |     def __new__(cls):
22 |         if cls._instance is None:
23 |             cls._instance = super().__new__(cls)
24 |         return cls._instance
25 | 
26 |     def __init__(self):
27 |         # Store function references directly instead of module paths
28 |         self._tool_imports: Dict[ToolType, Any] = {
29 |             ToolType.BLOG_SEARCH: search_in_blog_posts_tool,
30 |             ToolType.BLOG_ADVANCE_SEARCH: search_in_blog_posts_tool_advance,
31 |             ToolType.BLOG_SUMMARY: search_in_blog_posts_tool_summary,
32 |             ToolType.AMAZON_PRODUCTS_SEARCH_BY_JSON: by_json,
33 |             ToolType.AMAZON_PRODUCTS_SEARCH_BY_SUPERLINKED: by_superlinked,
34 |         }
35 |         super().__init__()
36 |         self._initialized = True
37 | 
38 |     def _initialize_items(self) -> None:
39 |         """Initialize tools lazily by storing function references"""
40 |         self._items = self._tool_imports.copy()
41 | 
42 |     def get_items(self) -> Dict[ToolType, Any]:
43 |         """Get all tools (as function references, not executed)"""
44 |         return self._items
45 | 


--------------------------------------------------------------------------------
/app/agents/langchain/vector_store/json_retriever.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | from langchain_community.vectorstores import Chroma
 4 | from langchain_openai import OpenAIEmbeddings
 5 | from langchain_text_splitters import RecursiveCharacterTextSplitter
 6 | from langchain_core.documents import Document
 7 | from dotenv import load_dotenv
 8 | 
 9 | from app.utils.constants import constants
10 | 
11 | load_dotenv()
12 | 
13 | def json_retriever():
14 |     """Setup and return the document retriever"""
15 |     # Create data directory if it doesn't exist
16 |     data_dir = Path("data")
17 |     data_dir.mkdir(exist_ok=True)
18 |     
19 |     # Create persistent directory for Chroma
20 |     persist_dir = data_dir / "chroma_db"
21 |     persist_dir.mkdir(exist_ok=True)
22 |     
23 |     # Check if dataset exists
24 |     if not constants.PROCESSED_DATASET_PATH.exists():
25 |         raise FileNotFoundError(f"Dataset not found at {constants.PROCESSED_DATASET_PATH}. Please ensure the file exists.")
26 |     
27 |     docs = []
28 |     seen_titles = set()  # Track seen titles to avoid duplicates
29 |     
30 |     with open(constants.PROCESSED_DATASET_PATH, 'r') as f:
31 |         for line in f:
32 |             if line.strip():  # Skip empty lines
33 |                 data = json.loads(line)
34 |                 title = data.get('title', '')
35 |                 
36 |                 # Skip if we've seen this title before
37 |                 if title in seen_titles:
38 |                     continue
39 |                 seen_titles.add(title)
40 |                 
41 |                 # Handle price that could be string or float
42 |                 price_raw = data.get('price', 0)
43 |                 if isinstance(price_raw, str):
44 |                     price_str = price_raw.replace('$', '').replace(',', '')
45 |                     try:
46 |                         price = float(price_str)
47 |                     except ValueError:
48 |                         price = 0.0
49 |                 else:
50 |                     price = float(price_raw)
51 |                 
52 |                 # Convert category list to string if it exists
53 |                 category = data.get('category', [])
54 |                 category_str = ', '.join(category) if isinstance(category, list) else str(category)
55 |                 
56 |                 # Create a rich page content that includes price for better matching
57 |                 page_content = f"Title: {title}. Price: ${price:.2f} Category: {category_str} rating: {data.get('review_rating', '')}"
58 |                 
59 |                 docs.append(Document(
60 |                     page_content=page_content,
61 |                     metadata={
62 |                         'title': title,
63 |                         'price': price,  # Store as float for easy comparison
64 |                         'type': data.get('type', ''),
65 |                         'category': category_str,
66 |                         'rating': data.get('review_rating', ''),
67 |                         'reviews': data.get('review_count', '')
68 |                     }
69 |                 ))
70 | 
71 |     text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
72 |         chunk_size=constants.CHUNK_SIZE,
73 |         chunk_overlap=constants.CHUNK_OVERLAP
74 |     )
75 |     doc_splits = text_splitter.split_documents(docs)
76 | 
77 |     # Use persistent storage for Chroma
78 |     vectorstore = Chroma.from_documents(
79 |         documents=doc_splits,
80 |         collection_name="amazon-products",
81 |         embedding=OpenAIEmbeddings(model=constants.EMBEDDING_MODEL),
82 |         #persist_directory=str(persist_dir)
83 |     )
84 |     return vectorstore.as_retriever()


--------------------------------------------------------------------------------
/app/agents/langchain/vector_store/sl_amazon_products_retriever.py:
--------------------------------------------------------------------------------
 1 | from app.agents.clients.superlinked import superlinked
 2 | from app.agents.schema.superlinked import query_search
 3 | from langchain_core.messages import AIMessage
 4 | 
 5 | def superlinked_amazon_products_retriever(query: str):
 6 |     superlinked.setup()
 7 |     result = superlinked.app.query(
 8 |         query_search.semantic_query,
 9 |         natural_query=query,
10 |         limit=3
11 |     )
12 |     
13 |     to_pandas = result.to_pandas()
14 |     
15 |     # Extract relevant fields
16 |     products = []
17 |     for index, row in to_pandas.iterrows():
18 |         title = row["title"]
19 |         price = f"${row['price']:.2f}"
20 |         rating = f"{row['review_rating']} ({row['review_count']} reviews)"
21 |         product_id = row["id"]
22 |         
23 |         formatted_output = f"{title}\nPrice: {price}\nRating: {rating}\n"
24 |         products.append(formatted_output)
25 |     
26 |     result_string = "\n".join(products)
27 | 
28 |     return result_string
29 | 


--------------------------------------------------------------------------------
/app/agents/langchain/vector_store/url_retriever.py:
--------------------------------------------------------------------------------
 1 | from langchain_community.document_loaders import WebBaseLoader
 2 | from langchain_community.vectorstores import Chroma
 3 | from langchain_openai import OpenAIEmbeddings
 4 | from dotenv import load_dotenv
 5 | from typing import List
 6 | from langchain_core.documents import Document
 7 | 
 8 | from app.utils.constants import constants
 9 | 
10 | load_dotenv()
11 | 
12 | _retriever = None
13 | 
14 | def url_retriever():
15 |     """Setup and return the document retriever"""
16 |     global _retriever
17 |     
18 |     if _retriever is not None:
19 |         return _retriever
20 |     
21 |     docs_list: List[Document] = []
22 |     for url in constants.URLS:
23 |         loader = WebBaseLoader(
24 |             url,
25 |             header_template={"User-Agent": "Mozilla/5.0"},
26 |             verify_ssl=False
27 |         )
28 |         docs = loader.load()
29 |         docs_list.extend(docs)
30 | 
31 |     vectorstore = Chroma.from_documents(
32 |         documents=docs_list,
33 |         embedding=OpenAIEmbeddings(
34 |             model=constants.EMBEDDING_MODEL,
35 |             dimensions=1536
36 |         ),
37 |     )
38 |     
39 |     # CONFIGURE RETRIEVER WITH IMPROVED SEARCH PARAMETERS
40 |     _retriever = vectorstore.as_retriever(
41 |         search_type="mmr",  # Use Maximum Marginal Relevance
42 |         search_kwargs={
43 |             "k": 2,  # Return top 4 most relevant chunks
44 |             "fetch_k": 2,  # Fetch 4 chunks from the index
45 |         }
46 |     )
47 |     
48 |     return _retriever


--------------------------------------------------------------------------------
/app/agents/schema/superlinked/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Deluxer/oliva/fac56e1afb72c2ff151e8e33ecf4896080835775/app/agents/schema/superlinked/__init__.py


--------------------------------------------------------------------------------
/app/agents/schema/superlinked/index.py:
--------------------------------------------------------------------------------
 1 | from app.utils.constants import constants
 2 | import superlinked.framework as sl
 3 | 
 4 | class ProductSchema(sl.Schema):
 5 |     id: sl.IdField
 6 |     type: sl.String
 7 |     category: sl.StringList
 8 |     title: sl.String
 9 |     description: sl.String
10 |     review_rating: sl.Float
11 |     review_count: sl.Integer
12 |     price: sl.Float
13 | 
14 | 
15 | product = ProductSchema()
16 | 
17 | category_space = sl.CategoricalSimilaritySpace(
18 |     category_input=product.category,
19 |     categories=constants.SPLK_CATEGORIES,
20 |     uncategorized_as_category=True,
21 |     negative_filter=-1,
22 | )
23 | title_space = sl.TextSimilaritySpace(
24 |     text=product.title, model="Alibaba-NLP/gte-large-en-v1.5"
25 | )
26 | description_space = sl.TextSimilaritySpace(
27 |     text=product.description, model="Alibaba-NLP/gte-large-en-v1.5"
28 | )
29 | review_rating_maximizer_space = sl.NumberSpace(
30 |     number=product.review_rating, min_value=-1.0, max_value=5.0, mode=sl.Mode.MAXIMUM
31 | )
32 | price_minimizer_space = sl.NumberSpace(
33 |     number=product.price, min_value=0.0, max_value=1000, mode=sl.Mode.MINIMUM
34 | )
35 | 
36 | product_index = sl.Index(
37 |     spaces=[
38 |         title_space,
39 |         description_space,
40 |         review_rating_maximizer_space,
41 |         price_minimizer_space,
42 |     ],
43 |     fields=[product.type, product.category, product.review_rating, product.price],
44 | )
45 | 


--------------------------------------------------------------------------------
/app/agents/schema/superlinked/query_search.py:
--------------------------------------------------------------------------------
 1 | from app.agents.schema.superlinked import index
 2 | from app.utils.constants import constants
 3 | import superlinked.framework as sl
 4 | 
 5 | openai_config = sl.OpenAIClientConfig(
 6 |     api_key=constants.OPENAI_API_KEY.get_secret_value(), model=constants.LLM_MODEL
 7 | )
 8 | 
 9 | title_similar_param = sl.Param(
10 |     "query_title",
11 |     description=(
12 |         "The text in the user's query that is used to search in the products' title."
13 |         "Extract info that does not apply to other spaces or params."
14 |     ),
15 | )
16 | text_similar_param = sl.Param(
17 |     "query_description",
18 |     description=(
19 |         "The text in the user's query that is used to search in the products' description."
20 |         " Extract info that does not apply to other spaces or params."
21 |     ),
22 | )
23 | 
24 | base_query = (
25 |     sl.Query(
26 |         index.product_index,
27 |         weights={
28 |             index.title_space: sl.Param("title_weight"),
29 |             index.description_space: sl.Param("description_weight"),
30 |             index.review_rating_maximizer_space: sl.Param(
31 |                 "review_rating_maximizer_weight"
32 |             ),
33 |             index.price_minimizer_space: sl.Param("price_minimizer_weights"),
34 |         },
35 |     )
36 |     .find(index.product)
37 |     .limit(sl.Param("limit"))
38 |     .with_natural_query(sl.Param("natural_query"), openai_config)
39 |     .filter(
40 |         index.product.type
41 |         == sl.Param(
42 |             "filter_by_type",
43 |             description="Used to only present items that have a specific type, if not, ignore this filter",
44 |             options=constants.SPLK_TYPES,
45 |         )
46 |     )
47 | )
48 | 
49 | semantic_query = (
50 |     base_query.similar(
51 |         index.description_space,
52 |         text_similar_param,
53 |         sl.Param("description_similar_clause_weight"),
54 |     )
55 |     .similar(
56 |         index.title_space,
57 |         title_similar_param,
58 |         sl.Param("title_similar_clause_weight"),
59 |     )
60 |     .filter(
61 |         index.product.category
62 |         == sl.Param(
63 |             "filter_by_category",
64 |             description="Used to only present items that have a specific category, if not, ignore this filter",
65 |             options=constants.SPLK_CATEGORIES,
66 |         )
67 |     )
68 | )


--------------------------------------------------------------------------------
/app/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Node implementations for the Agentic RAG workflow.
3 | """
4 | 
5 | from .constants import constants
6 | 
7 | __all__ = ['constants']


--------------------------------------------------------------------------------
/app/utils/constants.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from pydantic import SecretStr
 3 | import os
 4 | 
 5 | class Constants:
 6 |     """Constants for LangChain integration"""
 7 |     CHUNK_SIZE = 750
 8 |     CHUNK_OVERLAP = 100
 9 |     EMBEDDING_MODEL = "text-embedding-3-small"
10 |     LLM_MODEL = "gpt-4o-mini"
11 |     OPENAI_API_KEY = SecretStr(os.getenv("OPENAI_API_KEY"))
12 | 
13 |     URLS = [
14 |         "https://blog.langchain.dev/what-is-an-agent/",
15 |         "https://huggingface.co/blog/Kseniase/mcp",
16 |     ]
17 |     
18 |     PROCESSED_DATASET_PATH: Path = (
19 |         Path("data") / "processed_100_sample.jsonl"
20 |     )
21 | 
22 |     SPLK_TYPES = ["product", "book"]
23 | 
24 |     SPLK_CATEGORIES = [
25 |         "Accessories",
26 |         "Appliances",
27 |         "Arts & Photography",
28 |         "Arts, Crafts & Sewing",
29 |         "Automotive",
30 |         "Baby Care",
31 |         "Baby Products",
32 |         "Bath",
33 |         "Beauty & Personal Care",
34 |         "Bedding",
35 |         "Beverages",
36 |         "Biographies & Memoirs",
37 |         "Books",
38 |         "CDs & Vinyl",
39 |         "Camera & Photo",
40 |         "Cell Phones & Accessories",
41 |         "Children's Books",
42 |         "Christian Books & Bibles",
43 |         "Classical",
44 |         "Clothing, Shoes & Jewelry",
45 |         "Computers & Accessories",
46 |         "Costumes & Accessories",
47 |         "Dogs",
48 |         "Electrical",
49 |         "Electronics",
50 |         "Event & Party Supplies",
51 |         "Exercise & Fitness",
52 |         "Exterior Accessories",
53 |         "GPS, Finders & Accessories",
54 |         "Grocery & Gourmet Food",
55 |         "Hair Care",
56 |         "Health & Household",
57 |         "Home & Kitchen",
58 |         "Hunting & Fishing",
59 |         "Industrial & Scientific",
60 |         "Industrial Electrical",
61 |         "Kitchen & Dining",
62 |         "Lighting Assemblies & Accessories",
63 |         "Lights & Lighting Accessories",
64 |         "Luggage & Travel Gear",
65 |         "Makeup",
66 |         "Medical Supplies & Equipment",
67 |         "Men",
68 |         "Movies & TV",
69 |         "Musical Instruments",
70 |         "Office & School Supplies",
71 |         "Office Products",
72 |         "Patio Furniture & Accessories",
73 |         "Patio, Lawn & Garden",
74 |         "Pet Supplies",
75 |         "Pop",
76 |         "Portable Audio & Video",
77 |         "Power & Hand Tools",
78 |         "Raw Materials",
79 |         "Replacement Parts",
80 |         "Self-Help",
81 |         "Sports & Outdoor Play",
82 |         "Sports & Outdoors",
83 |         "Stuffed Animals & Plush Toys",
84 |         "Tires & Wheels",
85 |         "Tools & Home Improvement",
86 |         "Toy Figures & Playsets",
87 |         "Toys & Games",
88 |         "Vehicles",
89 |         "Video Games",
90 |         "Wall Art",
91 |         "Women",
92 |     ]
93 | 
94 | constants = Constants()


--------------------------------------------------------------------------------
/app/utils/helpers.py:
--------------------------------------------------------------------------------
 1 | import pprint
 2 | 
 3 | def stream(graph, formatted_input):
 4 |     # Execute workflow and collect results
 5 |     results = []
 6 |     for output in graph.stream(formatted_input):
 7 |         for key, value in output.items():
 8 |             pprint.pprint(f"Output from node '{key}':")
 9 |             pprint.pprint("---")
10 |             pprint.pprint(value, indent=2, width=80, depth=None)
11 |             pprint.pprint("\n---\n")
12 |             results.append(value['messages'])
13 | 
14 |     finalMessage = results[0][-1]
15 |     return finalMessage.content
16 | 
17 | def invoke(graph, formatted_input):
18 |     response = graph.invoke(formatted_input)
19 |     finalMessage = response["messages"][-1]
20 |     return finalMessage.content


--------------------------------------------------------------------------------
/app/utils/mapper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Deluxer/oliva/fac56e1afb72c2ff151e8e33ecf4896080835775/app/utils/mapper.py


--------------------------------------------------------------------------------
/app/utils/prompts.py:
--------------------------------------------------------------------------------
 1 | class Prompts:
 2 |     ASSISTANT_SYSTEM = """You are Oliva, a helpful AI assistant that can engage in natural conversations 
 3 |     and help users with various tasks. You aim to be:
 4 |     - Helpful and informative
 5 |     - Direct and concise in responses
 6 |     - Natural in conversation
 7 |     - Honest about capabilities and limitations
 8 |     
 9 |     When responding:
10 |     1. Keep responses brief but complete
11 |     2. Ask for clarification if needed
12 |     3. Be conversational but professional
13 |     4. Never pretend to have capabilities you don't have"""
14 | 
15 |     GRADE_DOCUMENTS_PROMPT = """You are a grader assessing relevance of retrieved documents to a user question.
16 |         
17 |         User question: {question}
18 |         
19 |         Retrieved documents: {context}
20 |         
21 |         Task:
22 |         1. Carefully analyze if the documents contain information that could help answer the question
23 |         2. For price-based queries, check if ANY document matches the price criteria
24 |         3. For category-based queries, check if ANY document matches the category
25 |         4. For product searches, consider a document relevant if it contains similar products even if not exact matches
26 |         5. If NO documents match the exact criteria but some are close, consider them relevant and mark as 'yes'
27 |         6. Only mark as 'no' if the documents are completely unrelated or irrelevant
28 |         
29 |         Provide:
30 |         1. A binary score 'yes' or 'no' to indicate document relevance
31 |         2. A brief explanation of your decision, including what relevant information was found or why documents were deemed irrelevant"""
32 |     GRADE_DOCUMENTS_PROMPT_OPT_2 = """You are a grader assessing relevance of retrieved docs to a user question.
33 |       Here are the retrieved docs:
34 |       \n ------- \n
35 |       {context} 
36 |       \n ------- \n
37 |       Here is the user question: {question}
38 |       If the content of the docs are relevant to the users question, score them as relevant.
39 |       Give a binary score 'yes' or 'no' score to indicate whether the docs are relevant to the question.
40 |       Yes: The docs are relevant to the question.
41 |       No: The docs are not relevant to the question."""
42 | 
43 |     BLOG_SEARCH_PROMPT = """You are a helpful blog assistant that helps users find information about blog posts.
44 |         When a user asks a question, always use the blog_search tool to find relevant blog posts.
45 |         Make sure to include the user's query in the tool call."""
46 | 
47 |     AMAZON_SEARCH_PROMPT = """You are a helpful product search assistant that helps users find products on our database.
48 |         When a user asks a question, always use the search_products_by_superlinked tool to find relevant products.
49 |         Make sure to include the user's query in the tool call.
50 |         Avoid mentioning the database."""
51 | 
52 |     NO_RESULTS_PROMPT = """You are a helpful assistant responding to a product search query.
53 |             Original query: {question}
54 |             
55 |             Task: Generate a polite response explaining that no exact matches were found.
56 |             Suggest broadening the search criteria (e.g. higher price range, different category).
57 |             """
58 | 
59 |     AGENT_PROMPT_BY_SUPERLINKED = """You are an assistant that helps users find products.
60 |         If the user asks about products, always use the 'search_products_by_superlinked' tool.
61 |         If no exact matches are found, respond with a polite message explaining that no exact matches were found.
62 |     """
63 | 
64 |     AGENT_PROMPT_BY_JSON = """You are an assistant that helps users find products.
65 |         If the user asks about products, always use the 'search_products_by_json' tool.
66 |     """
67 | 
68 |     def supervisor_system_prompt(self, members, agent_members_prompt_final):
69 |         supervisor_system_prompt = f"""
70 |         # Role
71 |         You are Oliva's personal assistant supervisor Agent. Your job is to ensure that tasks related with blog posts and search products are executed efficiently by your subagents.
72 |         # Context
73 |         You have access to the following {len(members)} subagents: {members}. Each subagent has its own specialized prompt and set of tools. Here is a description:
74 |         {agent_members_prompt_final}
75 |         # Objective
76 |         Analyze the user's request, decompose it into sub-tasks, and delegate each sub-task to the most appropriate subagent and ensure the task is completed.
77 |         # Instructions
78 |         1. Understand the user's goal.
79 |         2. Decompose the task into ordered sub-tasks.
80 |         3. For each sub-task, determine the best-suited agent.
81 |         4. When receiving messages from the agents assess them thoroughly for completion
82 |         5. When all work is done, respond with next = FINISH.
83 |         # Helpful Information
84 |         - When asked for Model Context Protocol (MCP) topic - only search in blog_post_agent.
85 |         - When asked for Agent definition or related topic - only search in blog_post_agent.
86 |         - When asked searching for specific products includes product prices, ratings, or categories - only search in amazon_products_agent.
87 |         - If the query is not related to blog posts or products, respond the user query with a natural conversation and next = FINISH.
88 |         # Important
89 |         Delegating tasks should be added to the task_description_for_agent field with the original query
90 |         Assess each message from sub agents carefully and decide whether the task is complete or not
91 |         """
92 | 
93 |         return supervisor_system_prompt
94 | 
95 | prompts = Prompts()


--------------------------------------------------------------------------------
/app/utils/types.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | class EdgeType(Enum):
 4 |     GRADE_DOCUMENTS = "grade_documents"
 5 |     CHECK_RELEVANCE = "check_relevance"
 6 | 
 7 | class NodeType(Enum):
 8 |     AGENT = "agent"
 9 |     GENERATE = "generate"
10 |     REWRITE = "rewrite"
11 |     SUPERVISOR = "supervisor"
12 | 
13 | class ToolType(Enum):
14 |     BLOG_SEARCH = "blog_search"
15 |     BLOG_ADVANCE_SEARCH = "blog_advance_search"
16 |     BLOG_SUMMARY = "blog_summary"
17 |     AMAZON_PRODUCTS_SEARCH_BY_JSON = "amazon_products_search_by_json"
18 |     AMAZON_PRODUCTS_SEARCH_BY_SUPERLINKED = "amazon_products_search_by_superlinked"


--------------------------------------------------------------------------------
/app/voice_assistant/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Deluxer/oliva/fac56e1afb72c2ff151e8e33ecf4896080835775/app/voice_assistant/__init__.py


--------------------------------------------------------------------------------
/app/voice_assistant/assistant.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import logging
  3 | from typing import Annotated
  4 | from livekit import rtc
  5 | from dotenv import load_dotenv
  6 | from livekit.agents import (
  7 |     AutoSubscribe,
  8 |     JobContext,
  9 |     JobProcess,
 10 |     WorkerOptions,
 11 |     cli,
 12 |     llm,
 13 |     metrics,
 14 | )
 15 | from livekit.agents.llm import (
 16 |     ChatContext,
 17 |     ChatMessage,
 18 |     FunctionContext
 19 | )
 20 | from livekit.agents.pipeline import VoicePipelineAgent, AgentCallContext
 21 | from livekit.plugins import deepgram, openai, silero, elevenlabs
 22 | from app.agents.implementations.supervisor import graph
 23 | import os
 24 | 
 25 | from app.utils.prompts import Prompts
 26 | 
 27 | load_dotenv()
 28 | logger = logging.getLogger("oliva-voice-assistant")
 29 | 
 30 | 
 31 | def prewarm(proc: JobProcess):
 32 |     proc.userdata["vad"] = silero.VAD.load()
 33 | 
 34 | class SearchProducts(FunctionContext):
 35 |     """The class defines a set of LLM functions that the assistant can execute. """
 36 | 
 37 |     @llm.ai_callable(name="search_products", description="Called when asked to search for products in oliva database")
 38 |     async def search_products(
 39 |         self,
 40 |         search_products: Annotated[
 41 |             str,
 42 |             llm.TypeInfo(description="Search for products by title, description, category, price, rating, and review"),
 43 |         ],
 44 |     ):  
 45 |         agent = AgentCallContext.get_current().agent
 46 |         local_participant = agent._room.local_participant
 47 |         
 48 |         try:
 49 |             #TODO: pass configurable options from livekit
 50 |             config = {
 51 |                 "configurable": {
 52 |                     "user_id": local_participant.identity,
 53 |                     "chat_id": local_participant.identity
 54 |                 }
 55 |             }
 56 | 
 57 |             input_state = {
 58 |                 "messages": [
 59 |                     {
 60 |                         "role": "user",
 61 |                         "content": search_products
 62 |                     }
 63 |                 ]
 64 |             }
 65 | 
 66 |             result = graph.invoke(
 67 |                 input_state,
 68 |                 config
 69 |             )
 70 | 
 71 |             if "messages" in result and result["messages"]:
 72 |                 message = result["messages"][-1]
 73 |                 message.pretty_print()
 74 |                 return message.content
 75 |             else:
 76 |                 logger.warning("No messages in result from graph invocation")
 77 |                 return "I apologize, but I couldn't process your request properly."
 78 | 
 79 |         except Exception as e:
 80 |             logger.error(f"Error during graph invocation: {str(e)}", exc_info=True)
 81 |             return "I encountered an error while processing your request. Please try again."
 82 | 
 83 | async def entrypoint(ctx: JobContext):
 84 |     fnc_ctx = SearchProducts()
 85 |     initial_ctx = ChatContext().append(
 86 |         role="system",
 87 |         text=Prompts.ASSISTANT_SYSTEM,
 88 |     )
 89 | 
 90 |     logger.info(f"connecting to room {ctx.room.name}")
 91 |     await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
 92 | 
 93 |     # wait for the first participant to connect
 94 |     participant = await ctx.wait_for_participant()
 95 |     logger.info(f"starting voice assistant for participant {participant.identity}")
 96 | 
 97 |     dg_model = "nova-2-general"
 98 |     if participant.kind == rtc.ParticipantKind.PARTICIPANT_KIND_SIP:
 99 |         # use a model optimized for telephony
100 |         dg_model = "nova-2-phonecall"
101 | 
102 |     elevenlabs_voice = elevenlabs.Voice(
103 |         id="ErXwobaYiN019PkySvjV",
104 |         name="Antoni",
105 |         category="premade",
106 |         settings=elevenlabs.VoiceSettings(
107 |             stability=0.71,
108 |             speed=1.0,
109 |             similarity_boost=0.5,
110 |             style=0.0,
111 |             use_speaker_boost=True,
112 |         ),
113 |     )
114 |     # elevenlabs_tts = elevenlabs.TTS(voice=elevenlabs_voice, model="eleven_flash_v2_5", api_key=os.getenv("ELEVENLABS_API_KEY"), base_url="https://api.elevenlabs.io/v1")
115 | 
116 |     agent = VoicePipelineAgent(
117 |         vad=ctx.proc.userdata["vad"],
118 |         stt=deepgram.STT(model=dg_model, endpointing_ms=200, no_delay=True, energy_filter=True, interim_results=True),
119 |         llm=openai.LLM(),
120 |         tts=openai.TTS(),
121 |         chat_ctx=initial_ctx,
122 |         fnc_ctx=fnc_ctx
123 |     )
124 | 
125 |     agent.start(ctx.room, participant)
126 | 
127 |     usage_collector = metrics.UsageCollector()
128 | 
129 |     @agent.on("metrics_collected")
130 |     def _on_metrics_collected(mtrcs: metrics.AgentMetrics):
131 |         # metrics.log_metrics(mtrcs)
132 |         usage_collector.collect(mtrcs)
133 | 
134 |     async def log_usage():
135 |         summary = usage_collector.get_summary()
136 |         logger.info(f"Usage: ${summary}")
137 | 
138 |     ctx.add_shutdown_callback(log_usage)
139 | 
140 |     # listen to incoming chat messages, only required if you'd like the agent to
141 |     # answer incoming messages from Chat
142 |     chat = rtc.ChatManager(ctx.room)
143 | 
144 |     async def answer_from_text(txt: str):
145 |         chat_ctx = agent.chat_ctx.copy()
146 |         chat_ctx.append(role="user", text=txt)
147 |         stream = agent.llm.chat(chat_ctx=chat_ctx)
148 |         await agent.say(stream)
149 | 
150 |     @chat.on("message_received")
151 |     def on_chat_received(msg: ChatMessage):
152 |         if msg.message:
153 |             asyncio.create_task(answer_from_text(msg.message))
154 | 
155 |     await agent.say("Hey, how can I help you today?", allow_interruptions=True)
156 | 
157 | 
158 | if __name__ == "__main__":
159 |     cli.run_app(
160 |         WorkerOptions(
161 |             entrypoint_fnc=entrypoint,
162 |             prewarm_fnc=prewarm,
163 |             job_memory_warn_mb=1500,
164 |         ),
165 |     )


--------------------------------------------------------------------------------
/assets/livekit_playground.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Deluxer/oliva/fac56e1afb72c2ff151e8e33ecf4896080835775/assets/livekit_playground.png


--------------------------------------------------------------------------------
/assets/oliva_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Deluxer/oliva/fac56e1afb72c2ff151e8e33ecf4896080835775/assets/oliva_architecture.png


--------------------------------------------------------------------------------
/assets/oliva_arquitecture_v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Deluxer/oliva/fac56e1afb72c2ff151e8e33ecf4896080835775/assets/oliva_arquitecture_v2.png


--------------------------------------------------------------------------------
/assets/snapshot.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Deluxer/oliva/fac56e1afb72c2ff151e8e33ecf4896080835775/assets/snapshot.zip


--------------------------------------------------------------------------------
/langgraph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "Search Products by Superlinked",
 3 |     "dockerfile_lines": [],
 4 |     "graphs": {
 5 |       "agent": "./app/agents/implementations/supervisor.py:graph"
 6 |     },
 7 |     "env": ".env",
 8 |     "python_version": "3.12",
 9 |     "dependencies": [
10 |       "."
11 |     ]
12 |   }


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "oliva_multi_agent"
 3 | version = "0.1.0"
 4 | description = "Oliva Multi-Agent Assistant"
 5 | readme = "README.md"
 6 | requires-python = ">=3.12"
 7 | authors = [
 8 |     {name = "GeraDeluxer", email = "gerardoangeln@gmail.com"}
 9 | ]
10 | dependencies = [
11 |     "aiofile",
12 |     "livekit-agents",
13 |     "livekit-agents>=0.12.12,<0.13",
14 |     "livekit-plugins-deepgram>=0.6.19,<0.7",
15 |     "livekit-plugins-openai>=0.10.19,<0.11",
16 |     "livekit-plugins-silero>=0.7.4,<0.8",
17 |     "livekit-plugins-elevenlabs",
18 |     "langgraph>=0.2.71,<0.3",
19 |     "langchain-core>=0.3.34,<0.4",
20 |     "python-dotenv>=1.0.1,<2",
21 |     "python-dotenv",
22 |     "loguru>=0.7.3",
23 |     "pydantic>=2,<3",
24 |     "pydantic-settings>=2.6.1",
25 |     "superlinked==17.1.0",
26 |     "langchain>=0.3.0",
27 |     "langchain-community>=0.0.10",
28 |     "langchain-openai>=0.3.10",
29 |     "langchainhub>=0.1.14",
30 |     "langchain-text-splitters>=0.0.1",
31 |     "chromadb",
32 |     "tiktoken",
33 |     "langchain-qdrant",
34 |     "qdrant-client",
35 |     "beautifulsoup4",
36 | ]
37 | 
38 | [tool.setuptools]
39 | packages = ["app", "data", "assets", "use_cases"]


--------------------------------------------------------------------------------
/use_cases/agent_blog_post_url.py:
--------------------------------------------------------------------------------
 1 | from app.agents.implementations.blog_post.agent import agent
 2 | import logging
 3 | import time
 4 | 
 5 | # logging.basicConfig(
 6 | #     level=logging.DEBUG,
 7 | #     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
 8 | # )
 9 | # logger = logging.getLogger(__name__)
10 | 
11 | if __name__ == "__main__":
12 |     """Search blog posts using the BlogPostAgent"""
13 |     start_time = time.time()
14 |     try:
15 |         result = agent.process({
16 |             "query": "How Harrison Chase defines an agent?"
17 |         })
18 |         print(result)
19 |     except Exception as e:
20 |         print(f"Error: {str(e)}")
21 |         raise
22 |     finally:
23 |         execution_time = time.time() - start_time
24 |         print(f"\nTotal execution time: {execution_time:.2f} seconds")


--------------------------------------------------------------------------------
/use_cases/agent_search_by_json.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | start_time = time.time()
 3 | from app.agents.implementations.search_amazon_products.agent_by_json import agent
 4 | 
 5 | def agent_search_in_amazon_products_by_json():
 6 |     """Search amazon products using the SearchAmazonProductsAgentByJson"""
 7 |     try:
 8 |         result = agent.process({
 9 |             "query": "products with a price lower than 100 and a rating bigger than 3"
10 |         })
11 |         print(result)
12 |     except Exception as e:
13 |         print(f"Error: {str(e)}")
14 |         raise
15 | 
16 | if __name__ == "__main__":
17 |     agent_search_in_amazon_products_by_json()
18 |     end_time = time.time()
19 |     execution_time = end_time - start_time
20 |     print(f"\nTotal execution time: {execution_time:.2f} seconds")


--------------------------------------------------------------------------------
/use_cases/agent_search_by_superlinked.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | start_time = time.time()
 3 | from app.agents.implementations.search_amazon_products.agent_by_superlinked import agent
 4 | 
 5 | if __name__ == "__main__":
 6 |     try:
 7 |         result = agent.process({
 8 |             "query": "books with a price lower than 100 and a rating bigger than 4"
 9 |         })
10 |         print(result)
11 |     except Exception as e:
12 |         print(f"Error in main: {str(e)}")
13 |         raise
14 |     finally:
15 |         execution_time = time.time() - start_time
16 |         print(f"\nTotal execution time: {execution_time:.2f} seconds")


--------------------------------------------------------------------------------
/use_cases/agent_supervisor.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | start_time = time.time()
 3 | from app.agents.implementations.supervisor import agent, graph
 4 | 
 5 | def agent_supervisor():
 6 |     try:
 7 |         result = agent.process({
 8 |             "query": "products with a price lower than 50 and a rating lower than 2"
 9 |             # "query": "How Harrison Chase defines an agent?"
10 |             # "query": 'prvide information about "Before MCP, How Were AI Systems Handling Context And Tool Access?" topic'
11 |         })
12 |         print(result)
13 |     except Exception as e:
14 |         print(f"Error in main: {str(e)}")
15 |         raise
16 |     finally:
17 |         execution_time = time.time() - start_time
18 |         print(f"\nTotal execution time: {execution_time:.2f} seconds")
19 | 
20 | def agent_supervisor_graph():
21 |     try:
22 |         config = {
23 |             "configurable": {
24 |                 "user_id": "1",
25 |                 "chat_id": "2"
26 |             }
27 |         }
28 |         result = graph.invoke(
29 |             {"messages": [{"role": "user", "content": "3 products with a price lower than 100 and a rating greater than 4"}]},
30 |             config,
31 |         )
32 |         if "messages" in result:
33 |             message = result["messages"][-1]
34 |             message.pretty_print()
35 |         else:
36 |             print("No messages in result")
37 | 
38 |     except Exception as e:
39 |         print(f"Error in main: {str(e)}")
40 |         raise
41 |     finally:
42 |         execution_time = time.time() - start_time
43 |         print(f"\nTotal execution time: {execution_time:.2f} seconds")        
44 | 
45 | if __name__ == "__main__":
46 |     agent_supervisor_graph()


--------------------------------------------------------------------------------