├── .env.example ├── .gitignore ├── .python-version ├── CONTRIBUTING.md ├── LICENCE ├── Makefile ├── README.md ├── app ├── agents │ ├── clients │ │ ├── __init__.py │ │ └── superlinked.py │ ├── config │ │ └── qdrant.py │ ├── core │ │ ├── agent_state.py │ │ └── base_agent.py │ ├── implementations │ │ ├── blog_post │ │ │ ├── __init__.py │ │ │ └── agent.py │ │ ├── main_deprecated.py │ │ ├── search_amazon_products │ │ │ ├── agent_by_json.py │ │ │ └── agent_by_superlinked.py │ │ └── supervisor.py │ ├── langchain │ │ ├── __init__.py │ │ ├── edges │ │ │ ├── __init__.py │ │ │ ├── check_relevance.py │ │ │ ├── edges.py │ │ │ └── grade_documents.py │ │ ├── factory.py │ │ ├── interface │ │ │ ├── __init__.py │ │ │ ├── base_provider.py │ │ │ └── events.py │ │ ├── memory │ │ │ └── long_term.py │ │ ├── nodes │ │ │ ├── __init__.py │ │ │ ├── agent.py │ │ │ ├── generate.py │ │ │ ├── nodes.py │ │ │ ├── rewrite.py │ │ │ └── supervisor.py │ │ ├── template.py │ │ ├── tools │ │ │ ├── __init__.py │ │ │ ├── amazon_products_search.py │ │ │ ├── blog_posts.py │ │ │ └── tools.py │ │ └── vector_store │ │ │ ├── json_retriever.py │ │ │ ├── sl_amazon_products_retriever.py │ │ │ └── url_retriever.py │ └── schema │ │ └── superlinked │ │ ├── __init__.py │ │ ├── index.py │ │ └── query_search.py ├── utils │ ├── __init__.py │ ├── constants.py │ ├── helpers.py │ ├── mapper.py │ ├── prompts.py │ └── types.py └── voice_assistant │ ├── __init__.py │ └── assistant.py ├── assets ├── livekit_playground.png ├── oliva_architecture.png ├── oliva_arquitecture_v2.png └── snapshot.zip ├── langgraph.json ├── pyproject.toml ├── use_cases ├── agent_blog_post_url.py ├── agent_search_by_json.py ├── agent_search_by_superlinked.py └── agent_supervisor.py └── uv.lock /.env.example: -------------------------------------------------------------------------------- 1 | LIVEKIT_URL=http://localhost:7880/ 2 | LIVEKIT_API_KEY=devkey 3 | LIVEKIT_API_SECRET=secret 4 | 5 | # QDRANT 6 | QDRANT_URL=http://localhost:6333 7 | QDRANT_API_KEY=... 8 | QDRANT_VECTOR_DIMENSION=2054 9 | QDRANT_VECTOR_DISTANCE=Dot 10 | QDRANT_VECTOR_NAME=... 11 | # QDRANT long term memory 12 | QDRANT_COLLECTION_NAME=... 13 | 14 | # Deepgram 15 | DEEPGRAM_API_KEY=... 16 | 17 | # Openai 18 | OPENAI_API_KEY=... 19 | OPENAI_MODEL=text-embedding-3-large 20 | 21 | # https://smith.langchain.com 22 | LANGSMITH_TRACING=true 23 | LANGSMITH_API_KEY=... 24 | 25 | # elevenlabs 26 | ELEVENLABS_API_KEY=... -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | 164 | # VSCode / Cursor 165 | .vscode 166 | 167 | # Data 168 | data/ 169 | in_memory_vdb/ 170 | .langgraph_api 171 | 172 | # MacOs 173 | .DS_Store -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Welcome to Oliva Contributing Guide! 👋 2 | 3 | We're thrilled that you're interested in contributing to Oliva! As an open-source multi-agent assistant, every contribution helps make AI more accessible to everyone 🚀 4 | 5 | ## Ways to Contribute 6 | 7 | Every contribution matters, no matter how small! Here are some ways you can help: 8 | 9 | - 📝 Fixing typos or improving documentation 10 | - 🐛 Reporting and fixing bugs 11 | - ✨ Adding new features or enhancements 12 | - 🔧 Improving existing code 13 | - 🎨 Enhancing the user interface 14 | - 🌍 Adding support for different languages or platforms 15 | - 🧪 Writing tests 16 | - 📚 Improving examples and tutorials 17 | 18 | ## Quick Start Guide 19 | 20 | 1. **Fork & Clone** 21 | ```bash 22 | git clone https://github.com/Deluxer/oliva.git 23 | cd oliva 24 | git remote add upstream https://github.com/Deluxer/oliva.git 25 | ``` 26 | 27 | 2. **Set Up Environment** 28 | ```bash 29 | # Create virtual environment with uv 30 | uv sync 31 | 32 | # Configure your environment 33 | cp .env.example .env 34 | # Edit .env with your API keys and settings 35 | ``` 36 | 37 | 3. **Start Coding** 38 | ```bash 39 | # Create a new branch 40 | git checkout -b feature/amazing-feature 41 | # or 42 | git checkout -b fix/bug-description 43 | ``` 44 | 45 | ## Development Guidelines 46 | 47 | ### 🔧 Technical Requirements 48 | 49 | - Python 3.12+ 50 | - Docker (for Qdrant) 51 | - Livekit account 52 | - Deepgram account 53 | - OpenAI API key 54 | 55 | ### 📝 Code Style 56 | 57 | - Follow PEP 8 58 | - Use type hints 59 | - Write descriptive docstrings 60 | - Keep functions focused and modular 61 | - Add comments for complex logic 62 | 63 | ### ✅ Before Submitting 64 | 65 | 1. **Test Your Changes** 66 | ```bash 67 | # Run all tests 68 | make test 69 | 70 | # Check code style 71 | make lint 72 | ``` 73 | 74 | 2. **Update Documentation** 75 | - Add docstrings for new functions 76 | - Update README if needed 77 | - Add examples for new features 78 | 79 | 3. **Commit Your Changes** 80 | ```bash 81 | git add . 82 | git commit -m "feat: add amazing feature" 83 | ``` 84 | Use conventional commits: 85 | - `feat:` new features 86 | - `fix:` bug fixes 87 | - `docs:` documentation 88 | - `test:` adding tests 89 | - `refactor:` code improvements 90 | 91 | ## 🐛 Reporting Issues 92 | 93 | Found a bug? Have a suggestion? We'd love to hear from you! When creating an issue: 94 | 95 | 1. Check if the issue already exists 96 | 2. Include: 97 | - Your Python and dependency versions 98 | - Clear steps to reproduce 99 | - Expected vs actual behavior 100 | - Relevant logs or screenshots 101 | - Operating system details 102 | 103 | ## 🚀 Pull Request Process 104 | 105 | 1. Update your fork 106 | ```bash 107 | git fetch upstream 108 | git rebase upstream/main 109 | ``` 110 | 111 | 2. Push your changes 112 | ```bash 113 | git push origin your-branch-name 114 | ``` 115 | 116 | 3. Open a PR with: 117 | - Clear description of changes 118 | - Screenshots for UI changes 119 | - Reference to related issues 120 | - List of breaking changes (if any) 121 | 122 | 4. Respond to review comments 123 | 124 | ## 📞 Questions 125 | 126 | Please send your questions to gerardoangeln@gmail.com or creating a public issue. 127 | 128 | ## ✨ Recognition 129 | 130 | Contributors are listed in our [CONTRIBUTORS.md](./CONTRIBUTORS.md) file. Thank you for making Oliva better! 🙏 131 | 132 | Let's build the future of AI assistants together! 🤖✨ -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Oliva 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | oliva-start: 2 | uv run python -m app.voice_assistant.assistant start 3 | 4 | agent-search-by-superlinked: 5 | uv run python -m use_cases.agent_search_by_superlinked 6 | 7 | agent-search-by-json: 8 | uv run python -m use_cases.agent_search_by_json 9 | 10 | agent-blog-post: 11 | uv run python -m use_cases.agent_blog_post_url 12 | 13 | agent-supervisor: 14 | uv run python -m use_cases.agent_supervisor -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |

Oliva Multi-Agent Assistant

3 |
4 |

Oliva is a multi-agent assistant that combines different agents to handle various tasks. These agents can be used to perform various tasks such as semantic search, text generation, question answering, and more. The project is designed to be flexible and extensible, allowing you to add more agents as needed.

5 | 6 |

7 | Oliva architecture 8 |

9 | 10 | ### Requirements: 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 |
RequirementDescription
Database PopulationFollow the setup instructions in the tabular-semantic-search-tutorial or download the snapshot in assets/snapshot.zip
QdrantVector database for efficient similarity search and storage of embeddings.
SuperlinkedFramework for building AI applications with semantic search capabilities.
Deepgram AccountSpeech-to-text service account required for converting voice input into text.
Livekit AccountReal-time communication platform needed for handling voice interactions.
Python KnowledgeUnderstanding of Python programming language (version 3.12+).
46 | 47 | ### Installation 48 | 1. Install project dependencies: 49 | ```bash 50 | uv sync 51 | ``` 52 | 53 | This will create a virtual environment in `.venv` and install all required dependencies. 54 | 55 | 2. Livekit account 56 | 57 | Create a Livekit account in [Livekit Cloud](https://cloud.livekit.io/login) and get `LIVEKIT_URL`, `LIVEKIT_API_KEY` and `LIVEKIT_API_SECRET`. 58 | 59 | 60 | ```bash 61 | LIVEKIT_URL=wss://your-project.livekit.cloud 62 | LIVEKIT_API_KEY=secret 63 | LIVEKIT_API_SECRET=******** 64 | ``` 65 | 66 | 3. Environment variables 67 | 68 | Before running any Python scripts, set the following environment variables: 69 | ```bash 70 | cp .env.example .env 71 | ``` 72 | 73 | 4. Qdrant 74 | 75 | Use docker to run Qdrant, set an API key wherever you want: 76 | ```bash 77 | docker run -p 6333:6333 -p 6334:6334 \ 78 | -e QDRANT__SERVICE__API_KEY=******** \ 79 | -v "$(pwd)/qdrant_storage:/qdrant/storage:z" \ 80 | qdrant/qdrant 81 | ``` 82 | 83 | ## Usage 84 | 85 | ### Voice assistant 86 | ```bash 87 | make oliva-start 88 | ``` 89 | 90 | ### Voice assistant frontend 91 | 92 | Use [Agent playground](https://agents-playground.livekit.io/) and connect with your Livekit project to interact with the voice assistant. 93 | If you prefer run locally, download the repo [Agent playground](https://github.com/livekit/agents-playground) and run `npm run start`. 94 | 95 | 96 |

97 | Livekit Agent Playground 98 |

99 | 100 | 101 | ## Project Structure 102 | 103 | ``` 104 | oliva/ 105 | ├── app/ 106 | │ ├── agents/ 107 | │ │ ├── implementations/ # Individual agent implementations 108 | │ │ ├── core/ # Base classes and interfaces for agent components 109 | │ │ └── langchain/ 110 | │ │ ├── base/ # Base LangChain integration classes 111 | │ │ ├── config/ # LangChain configuration 112 | │ │ ├── edges/ # Edge conditions for workflow routing 113 | │ │ ├── nodes/ # Node implementations (agent, rewrite, generate) 114 | │ │ └── tools/ # LangChain-specific tools 115 | │ ├── voice_assistant/ 116 | │ └── utils/ # Shared utilities 117 | ``` 118 | 119 | ### Architecture Overview 120 | 121 | The project follows a modular architecture implementing an agentic RAG (Retrieval-Augmented Generation) system: 122 | 123 | 1. **Agent Components** (`app/agents/`) 124 | - `agents/`: Contains specific agent implementations 125 | - `core/`: Defines core interfaces and abstract classes for: 126 | - State management 127 | - Node implementations 128 | - Edge conditions 129 | - Tool interfaces 130 | - Graph workflow definitions 131 | 132 | 2. **LangChain Integration** (`app/agents/integrations/langchain/`) 133 | - Provides LangChain-specific implementations for: 134 | - Document retrieval 135 | - Tool operations 136 | - State management 137 | - Workflow nodes and edges 138 | 139 | 3. **Voice Assistant** (`app/voice_assistant/`) 140 | - LiveKit integration 141 | * Voice interface implementation 142 | * Speech-to-text and text-to-speech capabilities 143 | 144 | 4. **Utilities** (`app/utils/`) 145 | - Shared helper functions 146 | - Common utilities used across modules 147 | 148 | The system implements a graph-based workflow where each agent processes state through a series of nodes (functions) connected by conditional edges, supporting dynamic routing based on the agent's decisions. 149 | 150 | ## Use cases 151 | 152 | Langchain workflow by supervisor agent 153 | ```bash 154 | make agent-supervisor 155 | ``` 156 | 157 | Langchain workflow by superlinked 158 | ```bash 159 | make agent-search-by-superlinked 160 | ``` 161 | 162 | Langchain workflow by json file 163 | ```bash 164 | make agent-search-by-json 165 | ``` 166 | 167 | ## Tech Stack 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 |
TechnologyVersion/TypeRole
LangchainLatestLLM application framework
LivekitCloud/Self-hostedReal-time voice communication
QdrantVector DBSemantic search storage
SuperlinkedFrameworkSemantic search capabilities
DeepgramAPI ServiceSpeech-to-text conversion
OpenAIAPI ServiceLLM provider
Python3.12+Core implementation
215 | 216 | ## 📚 Contributing 217 | 218 | This project is open-source and welcomes contributions from the community. For more details on how to contribute, please refer to the [Contributing Guide](./CONTRIBUTING.md). -------------------------------------------------------------------------------- /app/agents/clients/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | LangChain integration module for Agentic RAG implementation. 3 | """ 4 | 5 | from .superlinked import SuperlinkedClient 6 | 7 | 8 | __all__ = [ 9 | 'SuperlinkedClient', 10 | ] -------------------------------------------------------------------------------- /app/agents/clients/superlinked.py: -------------------------------------------------------------------------------- 1 | from app.agents.schema.superlinked import index 2 | import superlinked.framework as sl 3 | from app.agents.config.qdrant import qdrant_config 4 | from typing import Optional 5 | import logging 6 | 7 | class QdrantConnectionError(Exception): 8 | pass 9 | 10 | class SuperlinkedClient: 11 | def __init__(self) -> None: 12 | self.app = None 13 | 14 | def setup(self): 15 | self.app: Optional[sl.InteractiveExecutor] = None 16 | try: 17 | product_source: sl.InteractiveSource = sl.InteractiveSource(index.product) 18 | 19 | vector_database = sl.QdrantVectorDatabase( 20 | url=qdrant_config.QDRANT_URL.get_secret_value(), 21 | api_key=qdrant_config.QDRANT_API_KEY.get_secret_value(), 22 | default_query_limit=10, 23 | ) 24 | 25 | executor = sl.InteractiveExecutor( 26 | sources=[product_source], 27 | indices=[index.product_index], 28 | vector_database=vector_database, 29 | ) 30 | self.app = executor.run() 31 | except Exception as e: 32 | logging.error(f"Failed to connect to Qdrant: {str(e)}") 33 | raise QdrantConnectionError("Failed to establish connection with Qdrant vector database. Please check your connection and credentials.") from e 34 | 35 | superlinked = SuperlinkedClient() -------------------------------------------------------------------------------- /app/agents/config/qdrant.py: -------------------------------------------------------------------------------- 1 | 2 | from pydantic import SecretStr 3 | from pydantic_settings import BaseSettings 4 | from dotenv import load_dotenv 5 | 6 | load_dotenv() 7 | 8 | class QdrantConfig(BaseSettings): 9 | """Qdrant client settings.""" 10 | # superlinked 11 | QDRANT_URL: SecretStr 12 | QDRANT_API_KEY: SecretStr 13 | QDRANT_VECTOR_DIMENSION: int = 2054 14 | QDRANT_VECTOR_DISTANCE: str = "Dot" 15 | # long term memory 16 | QDRANT_COLLECTION_NAME: str = "oliva_history" 17 | QDRANT_VECTOR_NAME: str = "history_vector" 18 | 19 | qdrant_config = QdrantConfig() -------------------------------------------------------------------------------- /app/agents/core/agent_state.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated, Sequence 2 | from langgraph.graph import MessagesState 3 | from langchain_core.messages import BaseMessage 4 | from langgraph.graph.message import add_messages 5 | from app.utils.types import ToolType 6 | import operator 7 | 8 | def last_value_reducer(current: str | None, new: str) -> str: 9 | """Reducer that keeps only the last value, preserving initial value if new is empty""" 10 | if new == "": 11 | return current if current is not None else "" 12 | return new 13 | 14 | class BaseState(MessagesState): 15 | messages: Annotated[Sequence[BaseMessage], add_messages] 16 | next: Annotated[str, last_value_reducer] 17 | 18 | class AgentState(BaseState): 19 | pass 20 | 21 | class SubGraphAgentState(BaseState): 22 | rewrite_count: Annotated[int, operator.add] 23 | tools: Annotated[list[ToolType], operator.add] 24 | explanation: Annotated[str, operator.add] -------------------------------------------------------------------------------- /app/agents/core/base_agent.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List, Optional, Sequence 2 | 3 | from app.agents.langchain.interface.events import AgentEvents 4 | from app.agents.core.agent_state import AgentState 5 | from app.agents.langchain.tools.tools import ToolProvider 6 | from app.agents.langchain.edges.edges import EdgeProvider 7 | from app.agents.langchain.nodes.nodes import NodeProvider 8 | from app.utils.types import EdgeType, ToolType, NodeType 9 | from langgraph.types import Command 10 | 11 | class BaseAgent(): 12 | _instance = None 13 | """Base class for all agents""" 14 | def __new__(cls): 15 | if cls._instance is None: 16 | cls._instance = super().__new__(cls) 17 | return cls._instance 18 | 19 | def __init__( 20 | self, 21 | tool_types: Optional[Sequence[ToolType]] = None, 22 | edge_types: Optional[Sequence[EdgeType]] = None, 23 | node_types: Optional[Sequence[NodeType]] = None 24 | ): 25 | self.tool_types = tool_types 26 | self.edge_types = edge_types 27 | self.node_types = node_types 28 | self._tool_provider = None 29 | self._edge_provider = None 30 | self._nodes_provider = None 31 | 32 | @property 33 | def tool_provider(self) -> ToolProvider: 34 | if self._tool_provider is None: 35 | self._tool_provider = ToolProvider() 36 | return self._tool_provider 37 | 38 | @property 39 | def edge_provider(self) -> EdgeProvider: 40 | if self._edge_provider is None: 41 | self._edge_provider = EdgeProvider() 42 | return self._edge_provider 43 | 44 | @property 45 | def nodes_provider(self) -> NodeProvider: 46 | if self._nodes_provider is None: 47 | self._nodes_provider = NodeProvider() 48 | return self._nodes_provider 49 | 50 | # @lru_cache(maxsize=1) 51 | def setup_tools(self) -> List[Any]: 52 | """Get tools based on specified types or all available tools if none specified""" 53 | return self.tool_provider.get_items_by_types(self.tool_types) 54 | 55 | # @lru_cache(maxsize=1) 56 | def setup_edges(self) -> List[Any]: 57 | """Get edges based on specified types or all available edges if none specified""" 58 | return self.edge_provider.get_items_by_types(self.edge_types) 59 | 60 | # @lru_cache(maxsize=1) 61 | def setup_nodes(self) -> List[Any]: 62 | """Get nodes based on specified types or all available nodes if none specified""" 63 | return self.nodes_provider.get_items_by_types(self.node_types) 64 | 65 | def setup_events(self) -> Any: 66 | """Initialize workflow components when needed""" 67 | tools = self.setup_tools() 68 | edges = self.setup_edges() 69 | nodes = self.setup_nodes() 70 | 71 | return AgentEvents.mapper(tools, edges, nodes) 72 | 73 | def inject_tools_and_template(self, tools, target_node, template): 74 | """Create a wrapper node that injects tools into the state before execution. 75 | 76 | Args: 77 | tools: Dictionary of tools to inject 78 | target_node: The original node function to wrap 79 | 80 | Returns: 81 | A wrapped node function that ensures tools are available in state 82 | """ 83 | def wrapped_node(state: AgentState): 84 | command = Command( 85 | goto=None, 86 | update={ 87 | "tools": tools, 88 | "template": template 89 | } 90 | ) 91 | 92 | return target_node(state | command.update) 93 | 94 | return wrapped_node -------------------------------------------------------------------------------- /app/agents/implementations/blog_post/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Deluxer/oliva/fac56e1afb72c2ff151e8e33ecf4896080835775/app/agents/implementations/blog_post/__init__.py -------------------------------------------------------------------------------- /app/agents/implementations/blog_post/agent.py: -------------------------------------------------------------------------------- 1 | from functools import lru_cache 2 | from app.agents.core.agent_state import SubGraphAgentState 3 | from app.agents.core.base_agent import BaseAgent 4 | from app.utils.types import EdgeType, NodeType, ToolType 5 | from langgraph.graph import END, START, StateGraph 6 | from langgraph.prebuilt import ToolNode, tools_condition 7 | from app.agents.langchain.factory import AgentFactory 8 | from typing import Dict, Any 9 | from app.utils.prompts import prompts 10 | 11 | class BlogPostAgent(BaseAgent): 12 | """Agent specialized in searching and analyzing blog posts""" 13 | _instance = None 14 | 15 | def __new__(cls): 16 | if cls._instance is None: 17 | cls._instance = super().__new__(cls) 18 | cls._instance.__init__() 19 | return cls._instance 20 | 21 | def __init__(self): 22 | if not hasattr(self, '_initialized'): 23 | super().__init__( 24 | tool_types=[ToolType.BLOG_SEARCH], 25 | edge_types=[EdgeType.GRADE_DOCUMENTS], 26 | node_types=[NodeType.AGENT, NodeType.GENERATE, NodeType.REWRITE] 27 | ) 28 | self._initialized = True 29 | self._workflow = None 30 | 31 | @lru_cache(maxsize=1) 32 | def prepare(self): 33 | """Prepare the agent workflow only when needed""" 34 | self._workflow = StateGraph(SubGraphAgentState) 35 | events = self.setup_events() 36 | tools, _, nodes = events 37 | 38 | # Dynamic injection of tools into the agent node 39 | agent = self.inject_tools_and_template(tools, nodes[NodeType.AGENT], prompts.BLOG_SEARCH_PROMPT) 40 | self._workflow.add_node("agent", agent) 41 | self._workflow.add_edge(START, "agent") 42 | 43 | def process(self, input_state: Dict[str, Any]) -> Dict[str, Any]: 44 | self.prepare() 45 | return AgentFactory.create_agent(self._workflow, input_state) 46 | 47 | def studio(self) -> Dict[str, Any]: 48 | """Compile workflow for LangGraph Studio""" 49 | self.prepare() 50 | return self._workflow.compile() 51 | 52 | agent = BlogPostAgent() 53 | 54 | # Initialize graph for LangGraph Studio 55 | graph = agent.studio() -------------------------------------------------------------------------------- /app/agents/implementations/main_deprecated.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Type 2 | from agents.base.agent import BaseAgent 3 | from agents.blog_post.agent import BlogPostAgent 4 | 5 | class CallAgent: 6 | """Factory for creating different types of agents""" 7 | 8 | _agents: Dict[str, Type[BaseAgent]] = { 9 | "blog_search": BlogPostAgent, 10 | # Add more agents here as they are implemented 11 | } 12 | 13 | @classmethod 14 | def create_agent(cls, agent_type: str) -> BaseAgent: 15 | """Create an agent instance based on the specified type""" 16 | if agent_type not in cls._agents: 17 | raise ValueError(f"Unknown agent type: {agent_type}") 18 | 19 | return cls._agents[agent_type]() 20 | 21 | def process_query(agent_type: str, query: str) -> Dict: 22 | """Process a query using the specified agent type""" 23 | agent = CallAgent.create_agent(agent_type) 24 | return agent.process_input({"query": query}) 25 | 26 | def get_blog_posts(): 27 | return process_query("blog_search", "How Harrison Chase defines an agent?") -------------------------------------------------------------------------------- /app/agents/implementations/search_amazon_products/agent_by_json.py: -------------------------------------------------------------------------------- 1 | from langgraph.graph import END, START, StateGraph 2 | from app.agents.core.agent_state import AgentState 3 | from functools import lru_cache 4 | from langgraph.prebuilt import ToolNode, tools_condition 5 | 6 | from app.agents.core.base_agent import BaseAgent 7 | from app.agents.langchain.factory import AgentFactory 8 | from app.utils.types import ToolType, EdgeType, NodeType 9 | from app.utils.prompts import prompts 10 | 11 | class SearchAmazonProductsAgentByJson(BaseAgent): 12 | """Agent specialized in searching amazon products""" 13 | _instance = None 14 | 15 | def __new__(cls): 16 | if cls._instance is None: 17 | cls._instance = super().__new__(cls) 18 | return cls._instance 19 | 20 | def __init__(self): 21 | if not hasattr(self, '_initialized'): 22 | super().__init__( 23 | tool_types=[ToolType.AMAZON_PRODUCTS_SEARCH_BY_JSON], 24 | edge_types=[EdgeType.GRADE_DOCUMENTS], 25 | node_types=[NodeType.AGENT, NodeType.GENERATE, NodeType.REWRITE] 26 | ) 27 | self._workflow = None 28 | self._initialized = True 29 | 30 | @lru_cache(maxsize=1) 31 | def prepare(self): 32 | """Initialize workflow components and configure the graph structure.""" 33 | self._workflow = StateGraph(AgentState) 34 | 35 | events = self.setup_events() 36 | tools, edges, nodes = events 37 | agent = self.inject_tools_and_template(tools, nodes[NodeType.AGENT], prompts.AGENT_PROMPT_BY_JSON) 38 | 39 | self._workflow.add_node("agent", agent) 40 | self._workflow.add_node("retrieve", ToolNode(tools)) 41 | self._workflow.add_node("rewrite", nodes[NodeType.REWRITE]) 42 | self._workflow.add_node("generate", nodes[NodeType.GENERATE]) 43 | self._workflow.add_edge(START, "agent") 44 | self._workflow.add_conditional_edges( 45 | "agent", 46 | tools_condition, 47 | {"tools": "retrieve", END: END} 48 | ) 49 | self._workflow.add_conditional_edges( 50 | "retrieve", 51 | edges[EdgeType.GRADE_DOCUMENTS], 52 | {"generate": "generate", "rewrite": "rewrite"} 53 | ) 54 | self._workflow.add_edge("generate", END) 55 | 56 | def process(self, input_state: dict): 57 | self.prepare() 58 | 59 | result = AgentFactory.create_agent(self._workflow, input_state) 60 | return result 61 | 62 | def studio(self): 63 | """Compile workflow for LangGraph Studio""" 64 | self.prepare() 65 | return self._workflow.compile() 66 | 67 | agent = SearchAmazonProductsAgentByJson() 68 | 69 | # Initialize graph for LangGraph Studio 70 | graph = agent.studio() -------------------------------------------------------------------------------- /app/agents/implementations/search_amazon_products/agent_by_superlinked.py: -------------------------------------------------------------------------------- 1 | from langgraph.graph import END, START, StateGraph 2 | from app.agents.core.agent_state import AgentState 3 | from functools import lru_cache 4 | from typing import Dict, Any 5 | 6 | from app.agents.core.base_agent import BaseAgent 7 | from app.agents.langchain.factory import AgentFactory 8 | from app.utils.types import NodeType, ToolType 9 | from app.utils.prompts import prompts 10 | 11 | class SearchAmazonProductsAgentBySuperlinked(BaseAgent): 12 | """Agent specialized in searching amazon products""" 13 | _instance = None 14 | 15 | def __new__(cls): 16 | if cls._instance is None: 17 | cls._instance = super().__new__(cls) 18 | cls._instance.__init__() 19 | return cls._instance 20 | 21 | def __init__(self): 22 | if not hasattr(self, '_initialized'): 23 | super().__init__( 24 | tool_types=[ToolType.AMAZON_PRODUCTS_SEARCH_BY_SUPERLINKED], 25 | edge_types=[], 26 | node_types=[NodeType.AGENT] 27 | ) 28 | self._workflow = None 29 | self._initialized = True 30 | 31 | @lru_cache(maxsize=1) 32 | def prepare(self): 33 | """Initialize workflow components and configure the graph structure.""" 34 | self._workflow = StateGraph(AgentState) 35 | 36 | events = self.setup_events() 37 | tools, _, nodes = events 38 | # Dynamic injection of tools into the agent node 39 | agent = self.inject_tools_and_template(tools, nodes[NodeType.AGENT], prompts.AGENT_PROMPT_BY_SUPERLINKED) 40 | self._workflow.add_node("agent", agent) 41 | 42 | self._workflow.set_entry_point('agent') 43 | self._workflow.add_edge(START, "agent") 44 | self._workflow.add_edge("agent", END) 45 | 46 | def process(self, input_state: Dict[str, Any]) -> Dict[str, Any]: 47 | self.prepare() 48 | return AgentFactory.create_agent(self._workflow, input_state) 49 | 50 | def studio(self) -> Dict[str, Any]: 51 | """Compile workflow for LangGraph Studio""" 52 | self.prepare() 53 | return self._workflow.compile() 54 | 55 | agent = SearchAmazonProductsAgentBySuperlinked() 56 | 57 | # Initialize graph for LangGraph Studio 58 | graph = agent.studio() -------------------------------------------------------------------------------- /app/agents/implementations/supervisor.py: -------------------------------------------------------------------------------- 1 | from langgraph.graph import END, START, StateGraph 2 | from langgraph.prebuilt import tools_condition 3 | from app.agents.core.agent_state import AgentState 4 | from functools import lru_cache 5 | from typing import Dict, Any 6 | 7 | from app.agents.core.base_agent import BaseAgent 8 | from app.agents.implementations.blog_post.agent import graph as graph_blog 9 | from app.agents.implementations.search_amazon_products.agent_by_superlinked import graph as graph_search_by_superlinked 10 | from app.agents.langchain.factory import AgentFactory 11 | from app.utils.types import NodeType 12 | from app.agents.langchain.memory.long_term import long_term_memory 13 | from functools import partial 14 | 15 | class SupervisorAgent(BaseAgent): 16 | """Agent specialized in supervising other agents""" 17 | _instance = None 18 | 19 | def __new__(cls): 20 | if cls._instance is None: 21 | cls._instance = super().__new__(cls) 22 | cls._instance.__init__() 23 | return cls._instance 24 | 25 | def __init__(self): 26 | if not hasattr(self, '_initialized'): 27 | super().__init__( 28 | tool_types=[], 29 | edge_types=[], 30 | node_types=[NodeType.SUPERVISOR] 31 | ) 32 | self._workflow = None 33 | self._initialized = True 34 | 35 | @lru_cache(maxsize=1) 36 | def prepare(self): 37 | """Initialize workflow components and configure the graph structure.""" 38 | self._workflow = StateGraph(AgentState) 39 | events = self.setup_events() 40 | _, _, nodes = events 41 | 42 | self._workflow.add_node("supervisor", partial(nodes[NodeType.SUPERVISOR], store=long_term_memory)) 43 | self._workflow.add_node("amazon_products_agent", graph_search_by_superlinked) 44 | self._workflow.add_node("blog_post_agent", graph_blog) 45 | 46 | self._workflow.add_edge(START, "supervisor") 47 | self._workflow.add_conditional_edges( 48 | "supervisor", 49 | tools_condition, 50 | { 51 | "amazon_products_agent": "amazon_products_agent", 52 | "blog_post_agent": "blog_post_agent", 53 | END: END 54 | } 55 | ) 56 | self._workflow.add_edge("supervisor", END) 57 | self._workflow.add_edge("blog_post_agent", END) 58 | self._workflow.add_edge("amazon_products_agent", END) 59 | 60 | def process(self, input_state: Dict[str, Any]) -> Dict[str, Any]: 61 | """Process input through the workflow""" 62 | self.prepare() 63 | return AgentFactory.create_agent(self._workflow, input_state) 64 | 65 | def studio(self) -> Dict[str, Any]: 66 | """Compile workflow for LangGraph Studio""" 67 | self.prepare() 68 | return self._workflow.compile() 69 | 70 | agent = SupervisorAgent() 71 | 72 | graph = agent.studio() -------------------------------------------------------------------------------- /app/agents/langchain/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | LangChain integration module for Agentic RAG implementation. 3 | """ 4 | 5 | from .nodes.agent import agent 6 | from .nodes.generate import generate 7 | from .nodes.rewrite import rewrite 8 | 9 | __all__ = [ 10 | 'agent', 11 | 'generate', 12 | 'rewrite', 13 | ] -------------------------------------------------------------------------------- /app/agents/langchain/edges/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Node implementations for the Agentic RAG workflow. 3 | """ 4 | 5 | from .edges import EdgeProvider 6 | 7 | __all__ = ['EdgeProvider'] -------------------------------------------------------------------------------- /app/agents/langchain/edges/check_relevance.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, Literal 2 | from langchain_core.messages import AIMessage 3 | 4 | def check_relevance(state) -> Literal["generate", "rewrite"]: 5 | """Check relevance of retrieved documents.""" 6 | print("---CHECK RELEVANCE---") 7 | 8 | messages = state["messages"] 9 | last_message = messages[-1] 10 | 11 | if not isinstance(last_message, AIMessage): 12 | raise ValueError("The 'checkRelevance' node requires the most recent message to be an AIMessage") 13 | 14 | if not hasattr(last_message, "tool_calls"): 15 | raise ValueError("The 'checkRelevance' node requires the most recent message to contain tool calls") 16 | 17 | tool_calls = last_message.tool_calls 18 | if not tool_calls or len(tool_calls) == 0: 19 | raise ValueError("Last message was not a function message") 20 | 21 | if tool_calls[0].args.get("binary_score") == "yes": 22 | print("---DECISION: DOCS RELEVANT---") 23 | return "generate" 24 | 25 | print("---DECISION: DOCS NOT RELEVANT---") 26 | return "rewrite" -------------------------------------------------------------------------------- /app/agents/langchain/edges/edges.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Literal 2 | import logging 3 | from importlib import import_module 4 | from app.utils.types import EdgeType 5 | from app.agents.langchain.interface.base_provider import BaseProvider 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | class EdgeProvider(BaseProvider[EdgeType]): 10 | """Edge provider for blog post related operations""" 11 | 12 | def __init__(self): 13 | self._edge_imports: Dict[EdgeType, tuple[str, str]] = { 14 | EdgeType.GRADE_DOCUMENTS: ("app.agents.langchain.edges.grade_documents", "grade_documents"), 15 | } 16 | super().__init__() 17 | 18 | def _initialize_items(self) -> None: 19 | """Lazy-load edges dynamically to prevent unnecessary imports.""" 20 | self._items = {} 21 | 22 | for edge_type, (module_path, func_name) in self._edge_imports.items(): 23 | try: 24 | module = import_module(module_path) 25 | self._items[edge_type] = getattr(module, func_name) 26 | except (ImportError, AttributeError) as e: 27 | logger.error(f"Failed to import edge {edge_type}: {e}") 28 | 29 | def get_items(self) -> Dict[EdgeType, Any]: 30 | """Get all edges""" 31 | return self._items 32 | 33 | def evaluate(self, state: Any) -> Literal["generate", "rewrite", str]: 34 | """Evaluate the state using the appropriate edge condition""" 35 | edges = self.get_items() 36 | if EdgeType.GRADE_DOCUMENTS in edges: 37 | return edges[EdgeType.GRADE_DOCUMENTS](state) 38 | return "generate" # Fallback 39 | -------------------------------------------------------------------------------- /app/agents/langchain/edges/grade_documents.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | from langchain_core.prompts import PromptTemplate 3 | from langchain_openai import ChatOpenAI 4 | from pydantic import BaseModel, Field 5 | 6 | from app.agents.core.agent_state import AgentState 7 | from app.utils.constants import constants 8 | from app.utils.prompts import prompts 9 | 10 | def grade_documents(state: AgentState) -> Literal["generate", "rewrite"]: 11 | """Determines whether the retrieved documents are relevant to the question.""" 12 | class grade(BaseModel): 13 | """Binary score for relevance check.""" 14 | binary_score: str = Field(description="Relevance score 'yes' or 'no'") 15 | explanation: str = Field(description="Brief explanation of the relevance decision") 16 | 17 | model = ChatOpenAI(temperature=0, model=constants.LLM_MODEL, streaming=True) 18 | llm_with_tool = model.with_structured_output(grade) 19 | 20 | prompt = PromptTemplate( 21 | template=prompts.GRADE_DOCUMENTS_PROMPT_OPT_2, 22 | input_variables=["context", "question"], 23 | ) 24 | 25 | chain = prompt | llm_with_tool 26 | messages = state["messages"] 27 | question = messages[0].content 28 | docs = messages[-1].content 29 | 30 | rewrite_count = state["rewrite_count"] 31 | 32 | # Define a max rewrite limit to avoid infinite loops 33 | MAX_REWRITE_ATTEMPTS = 2 34 | 35 | scored_result = chain.invoke({"question": question, "context": docs}) 36 | 37 | if scored_result.binary_score == "yes": 38 | return "generate" 39 | else: 40 | state["rewrite_count"] = rewrite_count + 1 41 | state["explanation"] = scored_result.explanation 42 | 43 | # Stop rewriting after max attempts 44 | if state["rewrite_count"] >= MAX_REWRITE_ATTEMPTS: 45 | return "generate" 46 | 47 | return "rewrite" 48 | -------------------------------------------------------------------------------- /app/agents/langchain/factory.py: -------------------------------------------------------------------------------- 1 | from langgraph.graph import StateGraph 2 | from langchain_core.messages import HumanMessage 3 | from app.utils.helpers import invoke, stream 4 | 5 | class AgentFactory: 6 | @staticmethod 7 | def create_agent(workflow: StateGraph, input_data: dict): 8 | """Execute the agent workflow without recompiling it""" 9 | current_query = input_data.get("query", "") 10 | 11 | if not isinstance(current_query, str) or not current_query.strip(): 12 | return {"messages": ["No valid query provided."]} 13 | 14 | graph = workflow.compile() 15 | 16 | formatted_input = { 17 | "messages": ["user", current_query], 18 | "tools": input_data.get("tools", []), 19 | "template": input_data.get("template", ""), 20 | "next": "agent" 21 | } 22 | 23 | return invoke(graph, formatted_input) -------------------------------------------------------------------------------- /app/agents/langchain/interface/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | LangChain integration module for Agentic RAG implementation. 3 | """ 4 | 5 | from .events import AgentEvents 6 | from .base_provider import BaseProvider 7 | 8 | __all__ = [ 9 | 'AgentEvents', 10 | 'BaseProvider', 11 | ] -------------------------------------------------------------------------------- /app/agents/langchain/interface/base_provider.py: -------------------------------------------------------------------------------- 1 | # Add type hints for better code clarity 2 | from abc import abstractmethod 3 | from typing import Dict, List, Optional, Sequence, Any, TypeVar, Generic 4 | 5 | T = TypeVar('T') 6 | class BaseProvider(Generic[T]): 7 | def __init__(self): 8 | self._items: Dict[T, Any] = {} 9 | self._initialize_items() 10 | 11 | @abstractmethod 12 | def _initialize_items(self) -> None: 13 | pass 14 | 15 | @abstractmethod 16 | def get_items(self) -> Dict[T, Any]: 17 | return self._items 18 | 19 | def get_items_by_types(self, types: Optional[Sequence[T]]) -> List[Any]: 20 | """Return items by types""" 21 | items_dict = self.get_items() 22 | # If no types specified, return empty dictionary 23 | if not types: 24 | return {} 25 | 26 | selected_items = {} 27 | for item_type in types: 28 | if item_type in items_dict: 29 | selected_items[item_type] = items_dict[item_type] 30 | return selected_items -------------------------------------------------------------------------------- /app/agents/langchain/interface/events.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List, Type, Union 2 | from app.agents.core.agent_state import AgentState 3 | from app.agents.langchain.interface.base_provider import BaseProvider 4 | 5 | class AgentEvents: 6 | @staticmethod 7 | def mapper( 8 | tools: Union[List[Any], Type[BaseProvider]], 9 | edges: Union[List[Any], Type[BaseProvider]], 10 | nodes: Union[List[Any], Type[BaseProvider]], 11 | ) -> List[Any]: 12 | tools_list = list(tools.values()) 13 | return [tools_list, edges, nodes] -------------------------------------------------------------------------------- /app/agents/langchain/memory/long_term.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional, Tuple, Union 2 | from datetime import datetime 3 | import uuid 4 | import logging 5 | 6 | from langchain_openai import OpenAIEmbeddings, ChatOpenAI 7 | from qdrant_client import QdrantClient, models 8 | from app.agents.config.qdrant import QdrantConfig 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | class LongTermMemoryStore(): 13 | """Long-term memory store implementation using Qdrant for semantic search via LangChain""" 14 | 15 | def __init__( 16 | self, 17 | collection_name: Optional[str] = None, 18 | embedding_model: str = "text-embedding-3-small", 19 | embedding_dims: int = 1536, 20 | ): 21 | """Initialize the memory store with Qdrant connection and in-memory store""" 22 | config = QdrantConfig() 23 | 24 | try: 25 | self.embeddings = OpenAIEmbeddings(model=embedding_model, dimensions=embedding_dims) 26 | except Exception as e: 27 | logger.error(f"Failed to initialize embeddings: {e}") 28 | raise 29 | 30 | self.collection_name = collection_name or config.QDRANT_COLLECTION_NAME 31 | self.embedding_dims = embedding_dims 32 | 33 | url = config.QDRANT_URL.get_secret_value() 34 | api_key = config.QDRANT_API_KEY.get_secret_value() 35 | 36 | try: 37 | self.client = QdrantClient( 38 | url=url, 39 | api_key=api_key, 40 | timeout=10.0 41 | ) 42 | 43 | # Check if collection exists, if not create it 44 | collections = self.client.get_collections().collections 45 | collection_names = [collection.name for collection in collections] 46 | 47 | if self.collection_name not in collection_names: 48 | logger.info(f"Creating new collection: {self.collection_name}") 49 | self.client.create_collection( 50 | collection_name=self.collection_name, 51 | vectors_config=models.VectorParams( 52 | size=self.embedding_dims, 53 | distance=models.Distance.COSINE 54 | ) 55 | ) 56 | logger.info(f"Created new collection: {self.collection_name}") 57 | 58 | except Exception as e: 59 | logger.error(f"Failed to initialize Qdrant store: {e}") 60 | raise 61 | 62 | def put( 63 | self, 64 | namespace: Union[Tuple[str, str], str], 65 | key: str, 66 | value: Dict[str, Any], 67 | *, 68 | index: Optional[Union[List[str], bool]] = None 69 | ) -> None: 70 | """Store a value in both Qdrant and in-memory store with semantic indexing 71 | 72 | Args: 73 | namespace: Either a tuple of (user_id, memory_type) or a string in format "user_id:memory_type" 74 | key: Unique identifier for the memory 75 | value: Dictionary containing the memory data 76 | index: List of fields to embed, or False to skip embedding 77 | """ 78 | # Convert tuple namespace to string if needed 79 | if isinstance(namespace, tuple): 80 | namespace = f"{namespace[0]}:{namespace[1]}" 81 | 82 | if index is False: 83 | return 84 | 85 | try: 86 | fields_to_embed = index if isinstance(index, list) else ["memory"] 87 | 88 | texts_to_embed = [] 89 | for field in fields_to_embed: 90 | if field in value: 91 | field_value = value[field] 92 | if isinstance(field_value, str): 93 | texts_to_embed.append(field_value) 94 | else: 95 | texts_to_embed.append(str(field_value)) 96 | 97 | text_to_embed = " ".join(texts_to_embed) 98 | if not text_to_embed.strip(): 99 | logger.warning(f"No text to embed found in fields: {fields_to_embed}") 100 | return 101 | 102 | vector = self.embeddings.embed_query(text_to_embed) 103 | 104 | point_id = str(uuid.uuid4()) 105 | self.client.upsert( 106 | collection_name=self.collection_name, 107 | points=[ 108 | models.PointStruct( 109 | id=point_id, 110 | payload={ 111 | "namespace": namespace, 112 | "key": key, 113 | "value": value, 114 | "created_at": datetime.now().isoformat(), 115 | "updated_at": datetime.now().isoformat(), 116 | "embedded_fields": fields_to_embed 117 | }, 118 | vector=vector 119 | ) 120 | ] 121 | ) 122 | logger.info(f"Successfully stored vector with id {point_id} in Qdrant") 123 | except Exception as e: 124 | logger.error(f"Failed to store in Qdrant: {e}") 125 | 126 | def search( 127 | self, 128 | namespace: Union[Tuple[str, str], str], 129 | query: str, 130 | limit: int = 5, 131 | *, 132 | score_threshold: float = 0.5 133 | ) -> List[Dict[str, Any]]: 134 | """Search for memories semantically similar to the query 135 | 136 | Args: 137 | namespace: Either a tuple of (user_id, memory_type) or a string in format "user_id:memory_type" 138 | query: Query text to search for 139 | limit: Maximum number of results to return 140 | score_threshold: Minimum similarity score (0-1) for results 141 | 142 | Returns: 143 | List of memories with their similarity scores 144 | """ 145 | # Convert tuple namespace to string if needed 146 | if isinstance(namespace, tuple): 147 | namespace = f"{namespace[0]}:{namespace[1]}" 148 | 149 | try: 150 | query_vector = self.embeddings.embed_query(query) 151 | 152 | search_result = self.client.search( 153 | collection_name=self.collection_name, 154 | query_vector=query_vector, 155 | query_filter=models.Filter( 156 | should=[ 157 | models.FieldCondition( 158 | key="namespace", 159 | match=models.MatchValue(value=namespace) 160 | ) 161 | ] 162 | ), 163 | limit=limit, 164 | score_threshold=score_threshold 165 | ) 166 | 167 | results = [] 168 | for hit in search_result: 169 | memory = hit.payload["value"] 170 | results.append({ 171 | "value": memory, 172 | "score": hit.score, 173 | "id": hit.id, 174 | "created_at": hit.payload.get("created_at"), 175 | "embedded_fields": hit.payload.get("embedded_fields", []) 176 | }) 177 | 178 | logger.debug(f"Found {len(results)} similar memories") 179 | return results 180 | 181 | except Exception as e: 182 | logger.error(f"Failed to search memories: {e}", exc_info=True) 183 | return [] 184 | 185 | def evaluate(self, query: str, memories: List[Dict[str, Any]]) -> Union[bool, str]: 186 | """Evaluate if the memories are relevant to the current query using LLM. 187 | 188 | Args: 189 | query: Current user query 190 | memories: List of retrieved memories with their metadata 191 | 192 | Returns: 193 | Union[bool, str]: Either a boolean indicating if memories are relevant, 194 | or a string containing the refined response 195 | """ 196 | try: 197 | llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) 198 | 199 | memory = memories[0]["value"] 200 | memory_query = memory["query"] 201 | memory_response = memory["response"] 202 | similarity_score = memories[0]["score"] 203 | 204 | eval_prompt = f"""Evaluate if a previous response can be reused for a new query and refine it if needed. 205 | 206 | Current Query: "{query}" 207 | 208 | Previous Interaction: 209 | Query: "{memory_query}" 210 | Response: "{memory_response}" 211 | Similarity Score: {similarity_score:.2f} (0-1 scale) 212 | 213 | Instructions: 214 | 1. First, evaluate if the queries are asking for similar information (e.g., both about books, products, etc.) 215 | 2. If queries are NOT similar, respond with "false" 216 | 3. If queries ARE similar: 217 | - If ALL items in the previous response satisfy the current query's conditions, respond with "true" 218 | - If SOME items match but others don't, respond with a REFINED version of the response that includes ONLY the matching items 219 | - If NO items match the current query's conditions, respond with "false" 220 | 221 | Format your response as either: 222 | - "false" if responses are completely different or no items match 223 | - "true" if all items in the original response match 224 | - A refined response starting with "REFINED:" that includes only matching items in the same format as the original response 225 | 226 | Example refinements: 227 | REFINED: 228 | Here are some books priced under $20: 229 | 1. **Book Title** 230 | - Price: $15.99 231 | - Rating: 4.5 232 | [Rest of the refined response...] 233 | 234 | REFINED: 235 | Harrison Chase defines an AI agent as... 236 | [Rest of the refined response...] 237 | """ 238 | 239 | result = llm.invoke(eval_prompt) 240 | 241 | content = result.content.strip() 242 | if content.startswith("REFINED:"): 243 | # Return the refined response 244 | return content[8:].strip() # Remove "REFINED:" prefix 245 | else: 246 | # Return boolean for true/false responses 247 | return content.lower() == "true" 248 | 249 | except Exception as e: 250 | logger.error(f"Failed to evaluate memories: {e}", exc_info=True) 251 | return False 252 | 253 | long_term_memory = LongTermMemoryStore() -------------------------------------------------------------------------------- /app/agents/langchain/nodes/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Node implementations for the Agentic RAG workflow. 3 | """ 4 | 5 | from .nodes import NodeProvider 6 | 7 | __all__ = ['NodeProvider'] -------------------------------------------------------------------------------- /app/agents/langchain/nodes/agent.py: -------------------------------------------------------------------------------- 1 | from langgraph.types import Command 2 | from typing import Literal 3 | from langgraph.checkpoint.memory import MemorySaver 4 | from langchain_openai import ChatOpenAI 5 | from langgraph.prebuilt import create_react_agent 6 | 7 | from app.agents.core.agent_state import SubGraphAgentState 8 | from app.utils.constants import constants 9 | 10 | def agent(state: SubGraphAgentState) -> Command[Literal['supervisor']]: 11 | """ 12 | Agent that decides whether to use tools or not 13 | """ 14 | tools = state["tools"] 15 | template = state['template'] 16 | memory = MemorySaver() 17 | model = ChatOpenAI(temperature=0, model=constants.LLM_MODEL) 18 | messagesState = state["messages"] 19 | agent = create_react_agent( 20 | model, 21 | tools, 22 | prompt=template, 23 | checkpointer=memory 24 | ) 25 | 26 | agent_response = agent.invoke({"messages": messagesState}) 27 | messages = agent_response["messages"] 28 | 29 | response = Command( 30 | goto = 'supervisor', 31 | update={"next": 'FINISH', "messages": messages}, 32 | graph=Command.PARENT 33 | ) 34 | 35 | return response 36 | -------------------------------------------------------------------------------- /app/agents/langchain/nodes/generate.py: -------------------------------------------------------------------------------- 1 | from langchain_core.output_parsers import StrOutputParser 2 | from langchain_openai import ChatOpenAI 3 | from langchain_core.prompts import PromptTemplate 4 | from langchain import hub 5 | 6 | from app.agents.core.agent_state import AgentState 7 | from app.utils.constants import constants 8 | from app.utils.prompts import prompts 9 | 10 | def generate(state: AgentState): 11 | """Generate answer based on retrieved documents""" 12 | messages = state["messages"] 13 | question = messages[0].content 14 | last_message = messages[-1] 15 | docs = last_message.content 16 | rewrite_count = state.get("rewrite_count", 0) 17 | 18 | # If we've tried rewriting and still found no results, generate a "no results" response 19 | if rewrite_count >= 1 and "DOCS NOT RELEVANT" in docs: 20 | no_results_prompt = PromptTemplate( 21 | template=prompts.NO_RESULTS_PROMPT, 22 | input_variables=["question"] 23 | ) 24 | llm = ChatOpenAI(model_name=constants.LLM_MODEL, temperature=0, streaming=True) 25 | chain = no_results_prompt | llm | StrOutputParser() 26 | response = chain.invoke({"question": question}) 27 | else: 28 | # Prompt 29 | prompt = hub.pull("rlm/rag-prompt") 30 | llm = ChatOpenAI(model_name=constants.LLM_MODEL, temperature=0.3, streaming=True) 31 | chain = prompt | llm | StrOutputParser() 32 | response = chain.invoke({"context": docs, "question": question}) 33 | 34 | return {"messages": response} -------------------------------------------------------------------------------- /app/agents/langchain/nodes/nodes.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Literal 2 | import logging 3 | from importlib import import_module 4 | from app.utils.types import NodeType 5 | from app.agents.langchain.interface.base_provider import BaseProvider 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | class NodeProvider(BaseProvider[NodeType]): 10 | """Node provider for blog post related operations""" 11 | 12 | def __init__(self): 13 | self._node_imports: Dict[NodeType, tuple[str, str]] = { 14 | NodeType.AGENT: ("app.agents.langchain.nodes.agent", "agent"), 15 | NodeType.GENERATE: ("app.agents.langchain.nodes.generate", "generate"), 16 | NodeType.REWRITE: ("app.agents.langchain.nodes.rewrite", "rewrite"), 17 | NodeType.SUPERVISOR: ("app.agents.langchain.nodes.supervisor", "supervisor"), 18 | } 19 | super().__init__() 20 | 21 | def _initialize_items(self) -> None: 22 | """Lazy-load nodes dynamically to prevent unnecessary imports.""" 23 | self._items = {} 24 | 25 | for node_type, (module_path, func_name) in self._node_imports.items(): 26 | try: 27 | module = import_module(module_path) 28 | self._items[node_type] = getattr(module, func_name) 29 | except (ImportError, AttributeError) as e: 30 | logger.error(f"Failed to import node {node_type}: {e}") 31 | 32 | def get_items(self) -> Dict[NodeType, Any]: 33 | """Get all nodes""" 34 | return self._items 35 | 36 | def evaluate(self, state: Any) -> Literal["generate", "rewrite", str]: 37 | """Evaluate the state using the appropriate node condition""" 38 | nodes = self.get_items() 39 | if NodeType.GENERATE in nodes: 40 | return nodes[NodeType.GENERATE](state) 41 | return "generate" # Fallback 42 | -------------------------------------------------------------------------------- /app/agents/langchain/nodes/rewrite.py: -------------------------------------------------------------------------------- 1 | from langchain_core.messages import HumanMessage 2 | from langchain_openai import ChatOpenAI 3 | from typing import Dict, Any 4 | 5 | from app.agents.core.agent_state import AgentState 6 | from app.utils.constants import constants 7 | 8 | def rewrite(state: AgentState) -> Dict[str, Any]: 9 | messages = state["messages"] 10 | question = messages[0].content 11 | rewrite_count = state.get("rewrite_count", 0) 12 | 13 | # Max rewrite attempts to prevent looping 14 | MAX_REWRITE_ATTEMPTS = 2 15 | if rewrite_count >= MAX_REWRITE_ATTEMPTS: 16 | return {"messages": messages, "rewrite_count": rewrite_count} 17 | 18 | strategy = "Make the question more specific" if rewrite_count == 0 else "Broaden search scope" 19 | 20 | msg = [ 21 | HumanMessage( 22 | content=f"""Transform this question for better results. 23 | Original question: {question} 24 | Strategy: {strategy} 25 | Provide only the rewritten question, no explanations.""" 26 | ) 27 | ] 28 | 29 | model = ChatOpenAI(temperature=0, model=constants.LLM_MODEL, streaming=True) 30 | response = model.invoke(msg) 31 | 32 | return { 33 | "messages": [response], 34 | "rewrite_count": rewrite_count + 1 35 | } 36 | -------------------------------------------------------------------------------- /app/agents/langchain/nodes/supervisor.py: -------------------------------------------------------------------------------- 1 | from langchain_core.runnables import RunnableConfig 2 | from langchain_openai import ChatOpenAI 3 | from langgraph.graph import END 4 | from langgraph.types import Command 5 | from langchain.schema import BaseStore 6 | from typing import TypedDict, Literal 7 | import uuid 8 | from datetime import datetime 9 | 10 | from app.utils.prompts import prompts 11 | from app.utils.constants import constants 12 | from app.agents.core.agent_state import AgentState 13 | 14 | supervisor_llm = ChatOpenAI(model=constants.LLM_MODEL) 15 | 16 | class SupervisorOutput(TypedDict): 17 | next: Literal["blog_post_agent", "amazon_products_agent", "FINISH"] 18 | task_description_for_agent: str 19 | message_completion_summary: str 20 | 21 | def supervisor( 22 | state: AgentState, 23 | config: RunnableConfig, 24 | *, 25 | store: BaseStore 26 | ) -> Command[Literal["blog_post_agent", "amazon_products_agent", END]]: 27 | """ 28 | This node is responsible for delegating tasks to other agents. 29 | It first checks the semantic memory for similar previous queries and their results. 30 | If a similar query exists, it may reuse the results or decide to refresh them. 31 | Otherwise, it delegates the task to the appropriate agent. 32 | """ 33 | user_id = config["configurable"]["user_id"] 34 | chat_id = config["configurable"]["chat_id"] 35 | 36 | # Get the initial query 37 | initial_query = state["messages"][0].content 38 | 39 | if state.get("next") == "FINISH": 40 | response = state["messages"][-1].content 41 | memory_text = f"User Query: {initial_query}\nAgent Response: {response}" 42 | 43 | store.put( 44 | namespace=f"{user_id}:{chat_id}:memories", 45 | key=str(uuid.uuid4()), 46 | value={ 47 | "query": initial_query, 48 | "response": response, 49 | "memory": memory_text, 50 | "created_at": datetime.now().isoformat(), 51 | "type": "conversation" 52 | }, 53 | index=["memory"] 54 | ) 55 | return Command(goto=END, update={"next": END}) 56 | 57 | # Get the current user query 58 | current_query = state["messages"][-1].content 59 | 60 | memories = store.search( 61 | namespace=f"{user_id}:{chat_id}:memories", 62 | query=current_query, 63 | limit=3 64 | ) 65 | 66 | if(memories): 67 | evaluation_result = store.evaluate(query=current_query, memories=memories) 68 | if isinstance(evaluation_result, str): 69 | return Command(goto=END, update={"next": END, "messages": evaluation_result}) 70 | elif evaluation_result: 71 | memory_response = memories[0]["value"]["response"] 72 | return Command(goto=END, update={"next": END, "messages": memory_response}) 73 | 74 | members = ["blog_post_agent", "amazon_products_agent"] 75 | agent_members_prompt_final = f""" 76 | blog_post_agent: 77 | - Prompt: {prompts.BLOG_SEARCH_PROMPT} 78 | amazon_products_agent: 79 | - Prompt: {prompts.AMAZON_SEARCH_PROMPT} 80 | """ 81 | supervisor_system_prompt = prompts.supervisor_system_prompt(members, agent_members_prompt_final) 82 | messages = [{"role": "system", "content": supervisor_system_prompt}] + state["messages"] 83 | response = supervisor_llm.with_structured_output(SupervisorOutput).invoke(messages) 84 | goto = response["next"] 85 | 86 | if goto == "FINISH": 87 | return Command(goto=END, update={"next": END, "messages": response["message_completion_summary"]}) 88 | 89 | new_messages = [{"role": "assistant", "content": response["task_description_for_agent"]}] 90 | return Command(goto=goto, update={"next": goto, "messages": new_messages}) -------------------------------------------------------------------------------- /app/agents/langchain/template.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import Any, List, Type 3 | from langchain_core.prompts import PromptTemplate 4 | from langchain_openai import ChatOpenAI 5 | 6 | from app.utils.constants import constants 7 | from app.agents.langchain.state import AgentState 8 | 9 | class LangChainTemplate: 10 | """Factory for creating LangChain components""" 11 | 12 | @staticmethod 13 | def create_llm(model_name: str = None, **kwargs) -> ChatOpenAI: 14 | """Create a ChatOpenAI instance with specified parameters""" 15 | return ChatOpenAI( 16 | temperature=kwargs.get('temperature', 0), 17 | model=model_name or constants.LLM_MODEL, 18 | streaming=kwargs.get('streaming', True) 19 | ) 20 | 21 | @staticmethod 22 | def create_prompt(template: str, input_variables: List[str]) -> PromptTemplate: 23 | """Create a prompt template""" 24 | return PromptTemplate( 25 | template=template, 26 | input_variables=input_variables 27 | ) 28 | 29 | @staticmethod 30 | def create_state() -> Type[AgentState]: 31 | """Create the agent state type""" 32 | return AgentState 33 | 34 | @staticmethod 35 | def create_chain(prompt: PromptTemplate, llm: ChatOpenAI, output_parser: Any = None): 36 | """Create a chain with the given components""" 37 | if output_parser: 38 | return prompt | llm | output_parser 39 | return prompt | llm -------------------------------------------------------------------------------- /app/agents/langchain/tools/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Node implementations for the Agentic RAG workflow. 3 | """ 4 | 5 | from .tools import ToolProvider 6 | 7 | __all__ = ['ToolProvider'] -------------------------------------------------------------------------------- /app/agents/langchain/tools/amazon_products_search.py: -------------------------------------------------------------------------------- 1 | from langchain_core.tools import tool 2 | from app.agents.langchain.vector_store.json_retriever import json_retriever 3 | from langchain_core.messages import AIMessage 4 | 5 | @tool('search_products_by_json', description="Tool for searching products based on a user's query.") 6 | def by_json(query: str): 7 | """Search for Amazon products using JSON data""" 8 | retriever = json_retriever() 9 | docs = retriever.invoke(query) 10 | doc_txt = docs[1].page_content 11 | return AIMessage(content=doc_txt) 12 | 13 | @tool('search_products_by_superlinked', description="Tool for searching products based on a user's query.") 14 | def by_superlinked(query: str): 15 | """Search for Amazon products using Superlinked""" 16 | from app.agents.langchain.vector_store.sl_amazon_products_retriever import superlinked_amazon_products_retriever 17 | return superlinked_amazon_products_retriever(query) -------------------------------------------------------------------------------- /app/agents/langchain/tools/blog_posts.py: -------------------------------------------------------------------------------- 1 | from langchain_core.tools import tool 2 | from app.agents.langchain.vector_store.url_retriever import url_retriever 3 | from typing import List 4 | from langchain_core.documents import Document 5 | 6 | def format_search_results(docs: List[Document]) -> str: 7 | """Format search results in a clear and structured way""" 8 | if not docs: 9 | return "No relevant information found." 10 | 11 | # Remove duplicate content and sort by relevance 12 | seen_content = set() 13 | unique_docs = [] 14 | 15 | for doc in docs: 16 | content = doc.page_content.strip() 17 | if content not in seen_content: 18 | seen_content.add(content) 19 | unique_docs.append(doc) 20 | 21 | formatted_results = [] 22 | for doc in unique_docs: 23 | metadata = doc.metadata 24 | title = metadata.get('title', 'No title') 25 | source = metadata.get('source', 'No source') 26 | content = doc.page_content.strip() 27 | 28 | # Clean up content formatting 29 | content = content.replace('\n\t\n', '\n').replace('\n\n\n', '\n\n') 30 | content = content.replace('\t', '').strip() 31 | 32 | if content: # Only include non-empty content 33 | result = f"Source: {source}\nTitle: {title}\nContent:\n{content}\n" 34 | formatted_results.append(result) 35 | 36 | return "\n---\n".join(formatted_results) 37 | 38 | @tool('blog_search', description="Search for specific information in blog posts.") 39 | def search_in_blog_posts_tool(query: str): 40 | """Search for relevant information in blog posts 41 | 42 | Args: 43 | query: The search query string 44 | 45 | Returns: 46 | Formatted string containing relevant blog post content 47 | """ 48 | retriever = url_retriever() 49 | docs = retriever.invoke(query) 50 | return format_search_results(docs) 51 | 52 | @tool('blog_advance_search', description="Advanced search in blog posts with metadata filtering.") 53 | def search_in_blog_posts_tool_advance(query: str): 54 | """Advanced search in blog posts with metadata filtering""" 55 | # TODO: Implement advanced search with metadata filtering 56 | pass 57 | 58 | @tool('blog_summary', description="Generate a concise summary of blog post search results.") 59 | def search_in_blog_posts_tool_summary(query: str): 60 | """Generate a concise summary of blog post search results""" 61 | # TODO: Implement summary generation 62 | pass -------------------------------------------------------------------------------- /app/agents/langchain/tools/tools.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | import logging 3 | from app.agents.langchain.interface.base_provider import BaseProvider 4 | from app.utils.types import ToolType 5 | 6 | # Explicitly import tool functions instead of using import_module 7 | from app.agents.langchain.tools.blog_posts import ( 8 | search_in_blog_posts_tool, 9 | search_in_blog_posts_tool_advance, 10 | search_in_blog_posts_tool_summary, 11 | ) 12 | from app.agents.langchain.tools.amazon_products_search import ( 13 | by_json, 14 | by_superlinked, 15 | ) 16 | 17 | class ToolProvider(BaseProvider[ToolType]): 18 | """Provider for all available tools""" 19 | _instance = None 20 | 21 | def __new__(cls): 22 | if cls._instance is None: 23 | cls._instance = super().__new__(cls) 24 | return cls._instance 25 | 26 | def __init__(self): 27 | # Store function references directly instead of module paths 28 | self._tool_imports: Dict[ToolType, Any] = { 29 | ToolType.BLOG_SEARCH: search_in_blog_posts_tool, 30 | ToolType.BLOG_ADVANCE_SEARCH: search_in_blog_posts_tool_advance, 31 | ToolType.BLOG_SUMMARY: search_in_blog_posts_tool_summary, 32 | ToolType.AMAZON_PRODUCTS_SEARCH_BY_JSON: by_json, 33 | ToolType.AMAZON_PRODUCTS_SEARCH_BY_SUPERLINKED: by_superlinked, 34 | } 35 | super().__init__() 36 | self._initialized = True 37 | 38 | def _initialize_items(self) -> None: 39 | """Initialize tools lazily by storing function references""" 40 | self._items = self._tool_imports.copy() 41 | 42 | def get_items(self) -> Dict[ToolType, Any]: 43 | """Get all tools (as function references, not executed)""" 44 | return self._items 45 | -------------------------------------------------------------------------------- /app/agents/langchain/vector_store/json_retriever.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from langchain_community.vectorstores import Chroma 4 | from langchain_openai import OpenAIEmbeddings 5 | from langchain_text_splitters import RecursiveCharacterTextSplitter 6 | from langchain_core.documents import Document 7 | from dotenv import load_dotenv 8 | 9 | from app.utils.constants import constants 10 | 11 | load_dotenv() 12 | 13 | def json_retriever(): 14 | """Setup and return the document retriever""" 15 | # Create data directory if it doesn't exist 16 | data_dir = Path("data") 17 | data_dir.mkdir(exist_ok=True) 18 | 19 | # Create persistent directory for Chroma 20 | persist_dir = data_dir / "chroma_db" 21 | persist_dir.mkdir(exist_ok=True) 22 | 23 | # Check if dataset exists 24 | if not constants.PROCESSED_DATASET_PATH.exists(): 25 | raise FileNotFoundError(f"Dataset not found at {constants.PROCESSED_DATASET_PATH}. Please ensure the file exists.") 26 | 27 | docs = [] 28 | seen_titles = set() # Track seen titles to avoid duplicates 29 | 30 | with open(constants.PROCESSED_DATASET_PATH, 'r') as f: 31 | for line in f: 32 | if line.strip(): # Skip empty lines 33 | data = json.loads(line) 34 | title = data.get('title', '') 35 | 36 | # Skip if we've seen this title before 37 | if title in seen_titles: 38 | continue 39 | seen_titles.add(title) 40 | 41 | # Handle price that could be string or float 42 | price_raw = data.get('price', 0) 43 | if isinstance(price_raw, str): 44 | price_str = price_raw.replace('$', '').replace(',', '') 45 | try: 46 | price = float(price_str) 47 | except ValueError: 48 | price = 0.0 49 | else: 50 | price = float(price_raw) 51 | 52 | # Convert category list to string if it exists 53 | category = data.get('category', []) 54 | category_str = ', '.join(category) if isinstance(category, list) else str(category) 55 | 56 | # Create a rich page content that includes price for better matching 57 | page_content = f"Title: {title}. Price: ${price:.2f} Category: {category_str} rating: {data.get('review_rating', '')}" 58 | 59 | docs.append(Document( 60 | page_content=page_content, 61 | metadata={ 62 | 'title': title, 63 | 'price': price, # Store as float for easy comparison 64 | 'type': data.get('type', ''), 65 | 'category': category_str, 66 | 'rating': data.get('review_rating', ''), 67 | 'reviews': data.get('review_count', '') 68 | } 69 | )) 70 | 71 | text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( 72 | chunk_size=constants.CHUNK_SIZE, 73 | chunk_overlap=constants.CHUNK_OVERLAP 74 | ) 75 | doc_splits = text_splitter.split_documents(docs) 76 | 77 | # Use persistent storage for Chroma 78 | vectorstore = Chroma.from_documents( 79 | documents=doc_splits, 80 | collection_name="amazon-products", 81 | embedding=OpenAIEmbeddings(model=constants.EMBEDDING_MODEL), 82 | #persist_directory=str(persist_dir) 83 | ) 84 | return vectorstore.as_retriever() -------------------------------------------------------------------------------- /app/agents/langchain/vector_store/sl_amazon_products_retriever.py: -------------------------------------------------------------------------------- 1 | from app.agents.clients.superlinked import superlinked 2 | from app.agents.schema.superlinked import query_search 3 | from langchain_core.messages import AIMessage 4 | 5 | def superlinked_amazon_products_retriever(query: str): 6 | superlinked.setup() 7 | result = superlinked.app.query( 8 | query_search.semantic_query, 9 | natural_query=query, 10 | limit=3 11 | ) 12 | 13 | to_pandas = result.to_pandas() 14 | 15 | # Extract relevant fields 16 | products = [] 17 | for index, row in to_pandas.iterrows(): 18 | title = row["title"] 19 | price = f"${row['price']:.2f}" 20 | rating = f"{row['review_rating']} ({row['review_count']} reviews)" 21 | product_id = row["id"] 22 | 23 | formatted_output = f"{title}\nPrice: {price}\nRating: {rating}\n" 24 | products.append(formatted_output) 25 | 26 | result_string = "\n".join(products) 27 | 28 | return result_string 29 | -------------------------------------------------------------------------------- /app/agents/langchain/vector_store/url_retriever.py: -------------------------------------------------------------------------------- 1 | from langchain_community.document_loaders import WebBaseLoader 2 | from langchain_community.vectorstores import Chroma 3 | from langchain_openai import OpenAIEmbeddings 4 | from dotenv import load_dotenv 5 | from typing import List 6 | from langchain_core.documents import Document 7 | 8 | from app.utils.constants import constants 9 | 10 | load_dotenv() 11 | 12 | _retriever = None 13 | 14 | def url_retriever(): 15 | """Setup and return the document retriever""" 16 | global _retriever 17 | 18 | if _retriever is not None: 19 | return _retriever 20 | 21 | docs_list: List[Document] = [] 22 | for url in constants.URLS: 23 | loader = WebBaseLoader( 24 | url, 25 | header_template={"User-Agent": "Mozilla/5.0"}, 26 | verify_ssl=False 27 | ) 28 | docs = loader.load() 29 | docs_list.extend(docs) 30 | 31 | vectorstore = Chroma.from_documents( 32 | documents=docs_list, 33 | embedding=OpenAIEmbeddings( 34 | model=constants.EMBEDDING_MODEL, 35 | dimensions=1536 36 | ), 37 | ) 38 | 39 | # CONFIGURE RETRIEVER WITH IMPROVED SEARCH PARAMETERS 40 | _retriever = vectorstore.as_retriever( 41 | search_type="mmr", # Use Maximum Marginal Relevance 42 | search_kwargs={ 43 | "k": 2, # Return top 4 most relevant chunks 44 | "fetch_k": 2, # Fetch 4 chunks from the index 45 | } 46 | ) 47 | 48 | return _retriever -------------------------------------------------------------------------------- /app/agents/schema/superlinked/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Deluxer/oliva/fac56e1afb72c2ff151e8e33ecf4896080835775/app/agents/schema/superlinked/__init__.py -------------------------------------------------------------------------------- /app/agents/schema/superlinked/index.py: -------------------------------------------------------------------------------- 1 | from app.utils.constants import constants 2 | import superlinked.framework as sl 3 | 4 | class ProductSchema(sl.Schema): 5 | id: sl.IdField 6 | type: sl.String 7 | category: sl.StringList 8 | title: sl.String 9 | description: sl.String 10 | review_rating: sl.Float 11 | review_count: sl.Integer 12 | price: sl.Float 13 | 14 | 15 | product = ProductSchema() 16 | 17 | category_space = sl.CategoricalSimilaritySpace( 18 | category_input=product.category, 19 | categories=constants.SPLK_CATEGORIES, 20 | uncategorized_as_category=True, 21 | negative_filter=-1, 22 | ) 23 | title_space = sl.TextSimilaritySpace( 24 | text=product.title, model="Alibaba-NLP/gte-large-en-v1.5" 25 | ) 26 | description_space = sl.TextSimilaritySpace( 27 | text=product.description, model="Alibaba-NLP/gte-large-en-v1.5" 28 | ) 29 | review_rating_maximizer_space = sl.NumberSpace( 30 | number=product.review_rating, min_value=-1.0, max_value=5.0, mode=sl.Mode.MAXIMUM 31 | ) 32 | price_minimizer_space = sl.NumberSpace( 33 | number=product.price, min_value=0.0, max_value=1000, mode=sl.Mode.MINIMUM 34 | ) 35 | 36 | product_index = sl.Index( 37 | spaces=[ 38 | title_space, 39 | description_space, 40 | review_rating_maximizer_space, 41 | price_minimizer_space, 42 | ], 43 | fields=[product.type, product.category, product.review_rating, product.price], 44 | ) 45 | -------------------------------------------------------------------------------- /app/agents/schema/superlinked/query_search.py: -------------------------------------------------------------------------------- 1 | from app.agents.schema.superlinked import index 2 | from app.utils.constants import constants 3 | import superlinked.framework as sl 4 | 5 | openai_config = sl.OpenAIClientConfig( 6 | api_key=constants.OPENAI_API_KEY.get_secret_value(), model=constants.LLM_MODEL 7 | ) 8 | 9 | title_similar_param = sl.Param( 10 | "query_title", 11 | description=( 12 | "The text in the user's query that is used to search in the products' title." 13 | "Extract info that does not apply to other spaces or params." 14 | ), 15 | ) 16 | text_similar_param = sl.Param( 17 | "query_description", 18 | description=( 19 | "The text in the user's query that is used to search in the products' description." 20 | " Extract info that does not apply to other spaces or params." 21 | ), 22 | ) 23 | 24 | base_query = ( 25 | sl.Query( 26 | index.product_index, 27 | weights={ 28 | index.title_space: sl.Param("title_weight"), 29 | index.description_space: sl.Param("description_weight"), 30 | index.review_rating_maximizer_space: sl.Param( 31 | "review_rating_maximizer_weight" 32 | ), 33 | index.price_minimizer_space: sl.Param("price_minimizer_weights"), 34 | }, 35 | ) 36 | .find(index.product) 37 | .limit(sl.Param("limit")) 38 | .with_natural_query(sl.Param("natural_query"), openai_config) 39 | .filter( 40 | index.product.type 41 | == sl.Param( 42 | "filter_by_type", 43 | description="Used to only present items that have a specific type, if not, ignore this filter", 44 | options=constants.SPLK_TYPES, 45 | ) 46 | ) 47 | ) 48 | 49 | semantic_query = ( 50 | base_query.similar( 51 | index.description_space, 52 | text_similar_param, 53 | sl.Param("description_similar_clause_weight"), 54 | ) 55 | .similar( 56 | index.title_space, 57 | title_similar_param, 58 | sl.Param("title_similar_clause_weight"), 59 | ) 60 | .filter( 61 | index.product.category 62 | == sl.Param( 63 | "filter_by_category", 64 | description="Used to only present items that have a specific category, if not, ignore this filter", 65 | options=constants.SPLK_CATEGORIES, 66 | ) 67 | ) 68 | ) -------------------------------------------------------------------------------- /app/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Node implementations for the Agentic RAG workflow. 3 | """ 4 | 5 | from .constants import constants 6 | 7 | __all__ = ['constants'] -------------------------------------------------------------------------------- /app/utils/constants.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from pydantic import SecretStr 3 | import os 4 | 5 | class Constants: 6 | """Constants for LangChain integration""" 7 | CHUNK_SIZE = 750 8 | CHUNK_OVERLAP = 100 9 | EMBEDDING_MODEL = "text-embedding-3-small" 10 | LLM_MODEL = "gpt-4o-mini" 11 | OPENAI_API_KEY = SecretStr(os.getenv("OPENAI_API_KEY")) 12 | 13 | URLS = [ 14 | "https://blog.langchain.dev/what-is-an-agent/", 15 | "https://huggingface.co/blog/Kseniase/mcp", 16 | ] 17 | 18 | PROCESSED_DATASET_PATH: Path = ( 19 | Path("data") / "processed_100_sample.jsonl" 20 | ) 21 | 22 | SPLK_TYPES = ["product", "book"] 23 | 24 | SPLK_CATEGORIES = [ 25 | "Accessories", 26 | "Appliances", 27 | "Arts & Photography", 28 | "Arts, Crafts & Sewing", 29 | "Automotive", 30 | "Baby Care", 31 | "Baby Products", 32 | "Bath", 33 | "Beauty & Personal Care", 34 | "Bedding", 35 | "Beverages", 36 | "Biographies & Memoirs", 37 | "Books", 38 | "CDs & Vinyl", 39 | "Camera & Photo", 40 | "Cell Phones & Accessories", 41 | "Children's Books", 42 | "Christian Books & Bibles", 43 | "Classical", 44 | "Clothing, Shoes & Jewelry", 45 | "Computers & Accessories", 46 | "Costumes & Accessories", 47 | "Dogs", 48 | "Electrical", 49 | "Electronics", 50 | "Event & Party Supplies", 51 | "Exercise & Fitness", 52 | "Exterior Accessories", 53 | "GPS, Finders & Accessories", 54 | "Grocery & Gourmet Food", 55 | "Hair Care", 56 | "Health & Household", 57 | "Home & Kitchen", 58 | "Hunting & Fishing", 59 | "Industrial & Scientific", 60 | "Industrial Electrical", 61 | "Kitchen & Dining", 62 | "Lighting Assemblies & Accessories", 63 | "Lights & Lighting Accessories", 64 | "Luggage & Travel Gear", 65 | "Makeup", 66 | "Medical Supplies & Equipment", 67 | "Men", 68 | "Movies & TV", 69 | "Musical Instruments", 70 | "Office & School Supplies", 71 | "Office Products", 72 | "Patio Furniture & Accessories", 73 | "Patio, Lawn & Garden", 74 | "Pet Supplies", 75 | "Pop", 76 | "Portable Audio & Video", 77 | "Power & Hand Tools", 78 | "Raw Materials", 79 | "Replacement Parts", 80 | "Self-Help", 81 | "Sports & Outdoor Play", 82 | "Sports & Outdoors", 83 | "Stuffed Animals & Plush Toys", 84 | "Tires & Wheels", 85 | "Tools & Home Improvement", 86 | "Toy Figures & Playsets", 87 | "Toys & Games", 88 | "Vehicles", 89 | "Video Games", 90 | "Wall Art", 91 | "Women", 92 | ] 93 | 94 | constants = Constants() -------------------------------------------------------------------------------- /app/utils/helpers.py: -------------------------------------------------------------------------------- 1 | import pprint 2 | 3 | def stream(graph, formatted_input): 4 | # Execute workflow and collect results 5 | results = [] 6 | for output in graph.stream(formatted_input): 7 | for key, value in output.items(): 8 | pprint.pprint(f"Output from node '{key}':") 9 | pprint.pprint("---") 10 | pprint.pprint(value, indent=2, width=80, depth=None) 11 | pprint.pprint("\n---\n") 12 | results.append(value['messages']) 13 | 14 | finalMessage = results[0][-1] 15 | return finalMessage.content 16 | 17 | def invoke(graph, formatted_input): 18 | response = graph.invoke(formatted_input) 19 | finalMessage = response["messages"][-1] 20 | return finalMessage.content -------------------------------------------------------------------------------- /app/utils/mapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Deluxer/oliva/fac56e1afb72c2ff151e8e33ecf4896080835775/app/utils/mapper.py -------------------------------------------------------------------------------- /app/utils/prompts.py: -------------------------------------------------------------------------------- 1 | class Prompts: 2 | ASSISTANT_SYSTEM = """You are Oliva, a helpful AI assistant that can engage in natural conversations 3 | and help users with various tasks. You aim to be: 4 | - Helpful and informative 5 | - Direct and concise in responses 6 | - Natural in conversation 7 | - Honest about capabilities and limitations 8 | 9 | When responding: 10 | 1. Keep responses brief but complete 11 | 2. Ask for clarification if needed 12 | 3. Be conversational but professional 13 | 4. Never pretend to have capabilities you don't have""" 14 | 15 | GRADE_DOCUMENTS_PROMPT = """You are a grader assessing relevance of retrieved documents to a user question. 16 | 17 | User question: {question} 18 | 19 | Retrieved documents: {context} 20 | 21 | Task: 22 | 1. Carefully analyze if the documents contain information that could help answer the question 23 | 2. For price-based queries, check if ANY document matches the price criteria 24 | 3. For category-based queries, check if ANY document matches the category 25 | 4. For product searches, consider a document relevant if it contains similar products even if not exact matches 26 | 5. If NO documents match the exact criteria but some are close, consider them relevant and mark as 'yes' 27 | 6. Only mark as 'no' if the documents are completely unrelated or irrelevant 28 | 29 | Provide: 30 | 1. A binary score 'yes' or 'no' to indicate document relevance 31 | 2. A brief explanation of your decision, including what relevant information was found or why documents were deemed irrelevant""" 32 | GRADE_DOCUMENTS_PROMPT_OPT_2 = """You are a grader assessing relevance of retrieved docs to a user question. 33 | Here are the retrieved docs: 34 | \n ------- \n 35 | {context} 36 | \n ------- \n 37 | Here is the user question: {question} 38 | If the content of the docs are relevant to the users question, score them as relevant. 39 | Give a binary score 'yes' or 'no' score to indicate whether the docs are relevant to the question. 40 | Yes: The docs are relevant to the question. 41 | No: The docs are not relevant to the question.""" 42 | 43 | BLOG_SEARCH_PROMPT = """You are a helpful blog assistant that helps users find information about blog posts. 44 | When a user asks a question, always use the blog_search tool to find relevant blog posts. 45 | Make sure to include the user's query in the tool call.""" 46 | 47 | AMAZON_SEARCH_PROMPT = """You are a helpful product search assistant that helps users find products on our database. 48 | When a user asks a question, always use the search_products_by_superlinked tool to find relevant products. 49 | Make sure to include the user's query in the tool call. 50 | Avoid mentioning the database.""" 51 | 52 | NO_RESULTS_PROMPT = """You are a helpful assistant responding to a product search query. 53 | Original query: {question} 54 | 55 | Task: Generate a polite response explaining that no exact matches were found. 56 | Suggest broadening the search criteria (e.g. higher price range, different category). 57 | """ 58 | 59 | AGENT_PROMPT_BY_SUPERLINKED = """You are an assistant that helps users find products. 60 | If the user asks about products, always use the 'search_products_by_superlinked' tool. 61 | If no exact matches are found, respond with a polite message explaining that no exact matches were found. 62 | """ 63 | 64 | AGENT_PROMPT_BY_JSON = """You are an assistant that helps users find products. 65 | If the user asks about products, always use the 'search_products_by_json' tool. 66 | """ 67 | 68 | def supervisor_system_prompt(self, members, agent_members_prompt_final): 69 | supervisor_system_prompt = f""" 70 | # Role 71 | You are Oliva's personal assistant supervisor Agent. Your job is to ensure that tasks related with blog posts and search products are executed efficiently by your subagents. 72 | # Context 73 | You have access to the following {len(members)} subagents: {members}. Each subagent has its own specialized prompt and set of tools. Here is a description: 74 | {agent_members_prompt_final} 75 | # Objective 76 | Analyze the user's request, decompose it into sub-tasks, and delegate each sub-task to the most appropriate subagent and ensure the task is completed. 77 | # Instructions 78 | 1. Understand the user's goal. 79 | 2. Decompose the task into ordered sub-tasks. 80 | 3. For each sub-task, determine the best-suited agent. 81 | 4. When receiving messages from the agents assess them thoroughly for completion 82 | 5. When all work is done, respond with next = FINISH. 83 | # Helpful Information 84 | - When asked for Model Context Protocol (MCP) topic - only search in blog_post_agent. 85 | - When asked for Agent definition or related topic - only search in blog_post_agent. 86 | - When asked searching for specific products includes product prices, ratings, or categories - only search in amazon_products_agent. 87 | - If the query is not related to blog posts or products, respond the user query with a natural conversation and next = FINISH. 88 | # Important 89 | Delegating tasks should be added to the task_description_for_agent field with the original query 90 | Assess each message from sub agents carefully and decide whether the task is complete or not 91 | """ 92 | 93 | return supervisor_system_prompt 94 | 95 | prompts = Prompts() -------------------------------------------------------------------------------- /app/utils/types.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | class EdgeType(Enum): 4 | GRADE_DOCUMENTS = "grade_documents" 5 | CHECK_RELEVANCE = "check_relevance" 6 | 7 | class NodeType(Enum): 8 | AGENT = "agent" 9 | GENERATE = "generate" 10 | REWRITE = "rewrite" 11 | SUPERVISOR = "supervisor" 12 | 13 | class ToolType(Enum): 14 | BLOG_SEARCH = "blog_search" 15 | BLOG_ADVANCE_SEARCH = "blog_advance_search" 16 | BLOG_SUMMARY = "blog_summary" 17 | AMAZON_PRODUCTS_SEARCH_BY_JSON = "amazon_products_search_by_json" 18 | AMAZON_PRODUCTS_SEARCH_BY_SUPERLINKED = "amazon_products_search_by_superlinked" -------------------------------------------------------------------------------- /app/voice_assistant/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Deluxer/oliva/fac56e1afb72c2ff151e8e33ecf4896080835775/app/voice_assistant/__init__.py -------------------------------------------------------------------------------- /app/voice_assistant/assistant.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | from typing import Annotated 4 | from livekit import rtc 5 | from dotenv import load_dotenv 6 | from livekit.agents import ( 7 | AutoSubscribe, 8 | JobContext, 9 | JobProcess, 10 | WorkerOptions, 11 | cli, 12 | llm, 13 | metrics, 14 | ) 15 | from livekit.agents.llm import ( 16 | ChatContext, 17 | ChatMessage, 18 | FunctionContext 19 | ) 20 | from livekit.agents.pipeline import VoicePipelineAgent, AgentCallContext 21 | from livekit.plugins import deepgram, openai, silero, elevenlabs 22 | from app.agents.implementations.supervisor import graph 23 | import os 24 | 25 | from app.utils.prompts import Prompts 26 | 27 | load_dotenv() 28 | logger = logging.getLogger("oliva-voice-assistant") 29 | 30 | 31 | def prewarm(proc: JobProcess): 32 | proc.userdata["vad"] = silero.VAD.load() 33 | 34 | class SearchProducts(FunctionContext): 35 | """The class defines a set of LLM functions that the assistant can execute. """ 36 | 37 | @llm.ai_callable(name="search_products", description="Called when asked to search for products in oliva database") 38 | async def search_products( 39 | self, 40 | search_products: Annotated[ 41 | str, 42 | llm.TypeInfo(description="Search for products by title, description, category, price, rating, and review"), 43 | ], 44 | ): 45 | agent = AgentCallContext.get_current().agent 46 | local_participant = agent._room.local_participant 47 | 48 | try: 49 | #TODO: pass configurable options from livekit 50 | config = { 51 | "configurable": { 52 | "user_id": local_participant.identity, 53 | "chat_id": local_participant.identity 54 | } 55 | } 56 | 57 | input_state = { 58 | "messages": [ 59 | { 60 | "role": "user", 61 | "content": search_products 62 | } 63 | ] 64 | } 65 | 66 | result = graph.invoke( 67 | input_state, 68 | config 69 | ) 70 | 71 | if "messages" in result and result["messages"]: 72 | message = result["messages"][-1] 73 | message.pretty_print() 74 | return message.content 75 | else: 76 | logger.warning("No messages in result from graph invocation") 77 | return "I apologize, but I couldn't process your request properly." 78 | 79 | except Exception as e: 80 | logger.error(f"Error during graph invocation: {str(e)}", exc_info=True) 81 | return "I encountered an error while processing your request. Please try again." 82 | 83 | async def entrypoint(ctx: JobContext): 84 | fnc_ctx = SearchProducts() 85 | initial_ctx = ChatContext().append( 86 | role="system", 87 | text=Prompts.ASSISTANT_SYSTEM, 88 | ) 89 | 90 | logger.info(f"connecting to room {ctx.room.name}") 91 | await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY) 92 | 93 | # wait for the first participant to connect 94 | participant = await ctx.wait_for_participant() 95 | logger.info(f"starting voice assistant for participant {participant.identity}") 96 | 97 | dg_model = "nova-2-general" 98 | if participant.kind == rtc.ParticipantKind.PARTICIPANT_KIND_SIP: 99 | # use a model optimized for telephony 100 | dg_model = "nova-2-phonecall" 101 | 102 | elevenlabs_voice = elevenlabs.Voice( 103 | id="ErXwobaYiN019PkySvjV", 104 | name="Antoni", 105 | category="premade", 106 | settings=elevenlabs.VoiceSettings( 107 | stability=0.71, 108 | speed=1.0, 109 | similarity_boost=0.5, 110 | style=0.0, 111 | use_speaker_boost=True, 112 | ), 113 | ) 114 | # elevenlabs_tts = elevenlabs.TTS(voice=elevenlabs_voice, model="eleven_flash_v2_5", api_key=os.getenv("ELEVENLABS_API_KEY"), base_url="https://api.elevenlabs.io/v1") 115 | 116 | agent = VoicePipelineAgent( 117 | vad=ctx.proc.userdata["vad"], 118 | stt=deepgram.STT(model=dg_model, endpointing_ms=200, no_delay=True, energy_filter=True, interim_results=True), 119 | llm=openai.LLM(), 120 | tts=openai.TTS(), 121 | chat_ctx=initial_ctx, 122 | fnc_ctx=fnc_ctx 123 | ) 124 | 125 | agent.start(ctx.room, participant) 126 | 127 | usage_collector = metrics.UsageCollector() 128 | 129 | @agent.on("metrics_collected") 130 | def _on_metrics_collected(mtrcs: metrics.AgentMetrics): 131 | # metrics.log_metrics(mtrcs) 132 | usage_collector.collect(mtrcs) 133 | 134 | async def log_usage(): 135 | summary = usage_collector.get_summary() 136 | logger.info(f"Usage: ${summary}") 137 | 138 | ctx.add_shutdown_callback(log_usage) 139 | 140 | # listen to incoming chat messages, only required if you'd like the agent to 141 | # answer incoming messages from Chat 142 | chat = rtc.ChatManager(ctx.room) 143 | 144 | async def answer_from_text(txt: str): 145 | chat_ctx = agent.chat_ctx.copy() 146 | chat_ctx.append(role="user", text=txt) 147 | stream = agent.llm.chat(chat_ctx=chat_ctx) 148 | await agent.say(stream) 149 | 150 | @chat.on("message_received") 151 | def on_chat_received(msg: ChatMessage): 152 | if msg.message: 153 | asyncio.create_task(answer_from_text(msg.message)) 154 | 155 | await agent.say("Hey, how can I help you today?", allow_interruptions=True) 156 | 157 | 158 | if __name__ == "__main__": 159 | cli.run_app( 160 | WorkerOptions( 161 | entrypoint_fnc=entrypoint, 162 | prewarm_fnc=prewarm, 163 | job_memory_warn_mb=1500, 164 | ), 165 | ) -------------------------------------------------------------------------------- /assets/livekit_playground.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Deluxer/oliva/fac56e1afb72c2ff151e8e33ecf4896080835775/assets/livekit_playground.png -------------------------------------------------------------------------------- /assets/oliva_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Deluxer/oliva/fac56e1afb72c2ff151e8e33ecf4896080835775/assets/oliva_architecture.png -------------------------------------------------------------------------------- /assets/oliva_arquitecture_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Deluxer/oliva/fac56e1afb72c2ff151e8e33ecf4896080835775/assets/oliva_arquitecture_v2.png -------------------------------------------------------------------------------- /assets/snapshot.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Deluxer/oliva/fac56e1afb72c2ff151e8e33ecf4896080835775/assets/snapshot.zip -------------------------------------------------------------------------------- /langgraph.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Search Products by Superlinked", 3 | "dockerfile_lines": [], 4 | "graphs": { 5 | "agent": "./app/agents/implementations/supervisor.py:graph" 6 | }, 7 | "env": ".env", 8 | "python_version": "3.12", 9 | "dependencies": [ 10 | "." 11 | ] 12 | } -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "oliva_multi_agent" 3 | version = "0.1.0" 4 | description = "Oliva Multi-Agent Assistant" 5 | readme = "README.md" 6 | requires-python = ">=3.12" 7 | authors = [ 8 | {name = "GeraDeluxer", email = "gerardoangeln@gmail.com"} 9 | ] 10 | dependencies = [ 11 | "aiofile", 12 | "livekit-agents", 13 | "livekit-agents>=0.12.12,<0.13", 14 | "livekit-plugins-deepgram>=0.6.19,<0.7", 15 | "livekit-plugins-openai>=0.10.19,<0.11", 16 | "livekit-plugins-silero>=0.7.4,<0.8", 17 | "livekit-plugins-elevenlabs", 18 | "langgraph>=0.2.71,<0.3", 19 | "langchain-core>=0.3.34,<0.4", 20 | "python-dotenv>=1.0.1,<2", 21 | "python-dotenv", 22 | "loguru>=0.7.3", 23 | "pydantic>=2,<3", 24 | "pydantic-settings>=2.6.1", 25 | "superlinked==17.1.0", 26 | "langchain>=0.3.0", 27 | "langchain-community>=0.0.10", 28 | "langchain-openai>=0.3.10", 29 | "langchainhub>=0.1.14", 30 | "langchain-text-splitters>=0.0.1", 31 | "chromadb", 32 | "tiktoken", 33 | "langchain-qdrant", 34 | "qdrant-client", 35 | "beautifulsoup4", 36 | ] 37 | 38 | [tool.setuptools] 39 | packages = ["app", "data", "assets", "use_cases"] -------------------------------------------------------------------------------- /use_cases/agent_blog_post_url.py: -------------------------------------------------------------------------------- 1 | from app.agents.implementations.blog_post.agent import agent 2 | import logging 3 | import time 4 | 5 | # logging.basicConfig( 6 | # level=logging.DEBUG, 7 | # format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", 8 | # ) 9 | # logger = logging.getLogger(__name__) 10 | 11 | if __name__ == "__main__": 12 | """Search blog posts using the BlogPostAgent""" 13 | start_time = time.time() 14 | try: 15 | result = agent.process({ 16 | "query": "How Harrison Chase defines an agent?" 17 | }) 18 | print(result) 19 | except Exception as e: 20 | print(f"Error: {str(e)}") 21 | raise 22 | finally: 23 | execution_time = time.time() - start_time 24 | print(f"\nTotal execution time: {execution_time:.2f} seconds") -------------------------------------------------------------------------------- /use_cases/agent_search_by_json.py: -------------------------------------------------------------------------------- 1 | import time 2 | start_time = time.time() 3 | from app.agents.implementations.search_amazon_products.agent_by_json import agent 4 | 5 | def agent_search_in_amazon_products_by_json(): 6 | """Search amazon products using the SearchAmazonProductsAgentByJson""" 7 | try: 8 | result = agent.process({ 9 | "query": "products with a price lower than 100 and a rating bigger than 3" 10 | }) 11 | print(result) 12 | except Exception as e: 13 | print(f"Error: {str(e)}") 14 | raise 15 | 16 | if __name__ == "__main__": 17 | agent_search_in_amazon_products_by_json() 18 | end_time = time.time() 19 | execution_time = end_time - start_time 20 | print(f"\nTotal execution time: {execution_time:.2f} seconds") -------------------------------------------------------------------------------- /use_cases/agent_search_by_superlinked.py: -------------------------------------------------------------------------------- 1 | import time 2 | start_time = time.time() 3 | from app.agents.implementations.search_amazon_products.agent_by_superlinked import agent 4 | 5 | if __name__ == "__main__": 6 | try: 7 | result = agent.process({ 8 | "query": "books with a price lower than 100 and a rating bigger than 4" 9 | }) 10 | print(result) 11 | except Exception as e: 12 | print(f"Error in main: {str(e)}") 13 | raise 14 | finally: 15 | execution_time = time.time() - start_time 16 | print(f"\nTotal execution time: {execution_time:.2f} seconds") -------------------------------------------------------------------------------- /use_cases/agent_supervisor.py: -------------------------------------------------------------------------------- 1 | import time 2 | start_time = time.time() 3 | from app.agents.implementations.supervisor import agent, graph 4 | 5 | def agent_supervisor(): 6 | try: 7 | result = agent.process({ 8 | "query": "products with a price lower than 50 and a rating lower than 2" 9 | # "query": "How Harrison Chase defines an agent?" 10 | # "query": 'prvide information about "Before MCP, How Were AI Systems Handling Context And Tool Access?" topic' 11 | }) 12 | print(result) 13 | except Exception as e: 14 | print(f"Error in main: {str(e)}") 15 | raise 16 | finally: 17 | execution_time = time.time() - start_time 18 | print(f"\nTotal execution time: {execution_time:.2f} seconds") 19 | 20 | def agent_supervisor_graph(): 21 | try: 22 | config = { 23 | "configurable": { 24 | "user_id": "1", 25 | "chat_id": "2" 26 | } 27 | } 28 | result = graph.invoke( 29 | {"messages": [{"role": "user", "content": "3 products with a price lower than 100 and a rating greater than 4"}]}, 30 | config, 31 | ) 32 | if "messages" in result: 33 | message = result["messages"][-1] 34 | message.pretty_print() 35 | else: 36 | print("No messages in result") 37 | 38 | except Exception as e: 39 | print(f"Error in main: {str(e)}") 40 | raise 41 | finally: 42 | execution_time = time.time() - start_time 43 | print(f"\nTotal execution time: {execution_time:.2f} seconds") 44 | 45 | if __name__ == "__main__": 46 | agent_supervisor_graph() --------------------------------------------------------------------------------