├── tests ├── __init__.py ├── test_utils.py ├── conftest.py ├── test_retriever.py └── test_memory_system.py ├── agentic_memory ├── __init__.py ├── llm_controller.py ├── retrievers.py └── memory_system.py ├── Figure ├── framework.jpg ├── intro-a.jpg └── intro-b.jpg ├── requirements.txt ├── .pre-commit-config.yaml ├── LICENSE ├── pyproject.toml ├── examples └── sovereign_memory.py ├── .gitignore └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agentic_memory/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Figure/framework.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agiresearch/A-mem/HEAD/Figure/framework.jpg -------------------------------------------------------------------------------- /Figure/intro-a.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agiresearch/A-mem/HEAD/Figure/intro-a.jpg -------------------------------------------------------------------------------- /Figure/intro-b.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agiresearch/A-mem/HEAD/Figure/intro-b.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | sentence-transformers>=2.2.2 2 | chromadb>=0.4.22 3 | rank_bm25>=0.2.2 4 | nltk>=3.8.1 5 | transformers>=4.36.2 6 | litellm>=1.16.11 7 | numpy>=1.24.3 8 | scikit-learn>=1.3.2 9 | openai>=1.3.7 10 | ollama>=0.1.0 11 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v6.0.0 4 | hooks: 5 | - id: end-of-file-fixer 6 | 7 | - repo: https://github.com/astral-sh/ruff-pre-commit 8 | rev: v0.14.0 9 | hooks: 10 | - id: ruff 11 | args: ["--line-length=100"] 12 | # linter. 13 | - id: ruff-check 14 | types_or: [ python, pyi ] 15 | args: [ --fix ] 16 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | """Test utilities for the memory system.""" 2 | from typing import List 3 | from agentic_memory.llm_controller import BaseLLMController 4 | 5 | class MockLLMController(BaseLLMController): 6 | """Mock LLM controller for testing""" 7 | def __init__(self): 8 | self.mock_response = "{}" 9 | 10 | def get_completion(self, prompt: str, response_format: dict = None, temperature: float = 0.7) -> str: 11 | """Mock completion that returns the pre-set response""" 12 | return self.mock_response 13 | 14 | def get_embedding(self, text: str) -> List[float]: 15 | """Mock embedding that returns a zero vector""" 16 | return [0.0] * 384 # Mock embedding vector 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 AGI Research 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "agentic-memory" 7 | version = "0.0.1" 8 | description = "A library for implementing agentic memory in LLM applications." 9 | readme = "README.md" 10 | requires-python = ">=3.8" 11 | license = { file = "LICENSE" } 12 | classifiers = [ 13 | "Programming Language :: Python :: 3", 14 | "License :: OSI Approved :: MIT License", 15 | "Operating System :: OS Independent", 16 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 17 | ] 18 | dependencies = [ 19 | "sentence-transformers>=2.2.2", 20 | "chromadb>=0.4.22", 21 | "rank_bm25>=0.2.2", 22 | "nltk>=3.8.1", 23 | "litellm>=1.16.11", 24 | "numpy>=1.24.3", 25 | "scikit-learn>=1.3.2", 26 | "openai>=1.3.7", 27 | "pre-commit>=3.4.0", 28 | ] 29 | 30 | [project.optional-dependencies] 31 | dev = [ 32 | "pytest", 33 | "unittest", 34 | "ruff", 35 | "ipykernel", 36 | ] 37 | 38 | [tool.setuptools.packages.find] 39 | where = ["."] 40 | include = ["agentic_memory*"] 41 | exclude = ["tests*"] 42 | 43 | [project.urls] 44 | "Homepage" = "https://github.com/agiresearch/A-mem" 45 | "Bug Tracker" = "https://github.com/agiresearch/A-mem/issues" 46 | 47 | [tool.pytest.ini_options] 48 | minversion = "8.0" 49 | testpaths = ["tests"] 50 | addopts = "-q" 51 | pythonpath = ["."] 52 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tempfile 3 | import shutil 4 | from pathlib import Path 5 | 6 | from agentic_memory.retrievers import ChromaRetriever, PersistentChromaRetriever 7 | 8 | 9 | @pytest.fixture 10 | def retriever(): 11 | """Fixture providing a clean ChromaRetriever instance.""" 12 | retriever = ChromaRetriever(collection_name="test_memories") 13 | yield retriever 14 | # Cleanup: reset the collection after each test 15 | retriever.client.reset() 16 | 17 | 18 | @pytest.fixture 19 | def sample_metadata(): 20 | """Fixture providing sample metadata with various types.""" 21 | return { 22 | "timestamp": "2024-01-01T00:00:00", 23 | "tags": ["test", "memory"], 24 | "config": {"key": "value"}, 25 | "count": 42, 26 | "score": 0.95 27 | } 28 | 29 | 30 | @pytest.fixture 31 | def temp_db_dir(): 32 | """Fixture providing a temporary directory for persistent ChromaDB.""" 33 | temp_dir = tempfile.mkdtemp() 34 | yield Path(temp_dir) 35 | # Cleanup: remove the temporary directory after test 36 | shutil.rmtree(temp_dir, ignore_errors=True) 37 | 38 | 39 | @pytest.fixture 40 | def existing_collection(temp_db_dir, sample_metadata): 41 | """Fixture that creates a pre-existing collection with data.""" 42 | retriever = PersistentChromaRetriever( 43 | directory=str(temp_db_dir), 44 | collection_name="existing_collection" 45 | ) 46 | retriever.add_document("Existing document", sample_metadata, "existing_doc") 47 | return temp_db_dir, "existing_collection" 48 | -------------------------------------------------------------------------------- /examples/sovereign_memory.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | # Ensure we can import from source 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 6 | 7 | from agentic_memory.memory_system import AgenticMemorySystem 8 | 9 | def main(): 10 | print("🧠 Initializing A-mem Sovereign System (Local)...") 11 | 12 | # Initialize with local backend 13 | # Note: Requires Ollama running with 'llama3' pulled 14 | try: 15 | memory_system = AgenticMemorySystem( 16 | model_name='all-MiniLM-L6-v2', # Local embeddings (via sentence-transformers) 17 | llm_backend="ollama", 18 | llm_model="llama3" 19 | ) 20 | print("✅ System initialized.") 21 | except Exception as e: 22 | print(f"❌ Init failed: {e}") 23 | return 24 | 25 | # Add a memory 26 | print("\n📝 Adding Sovereign Memory...") 27 | content = "The user values data sovereignty and local processing above all else." 28 | try: 29 | # Note: A-mem automatically generates tags/context via LLM here 30 | memory_id = memory_system.add_note( 31 | content=content, 32 | tags=["sovereign", "privacy"], 33 | category="Principles" 34 | ) 35 | print(f" Memory stored with ID: {memory_id}") 36 | except Exception as e: 37 | print(f"❌ Failed to store memory: {e}") 38 | return 39 | 40 | # Retrieve 41 | print("\n🔍 Retrieving Memory...") 42 | try: 43 | results = memory_system.search_agentic("sovereignty", k=1) 44 | for res in results: 45 | print(f" Found: {res['content']}") 46 | print(f" Tags: {res['tags']}") 47 | print(f" Context (LLM Generated): {res.get('context', 'N/A')}") 48 | except Exception as e: 49 | print(f"❌ Retrieval failed: {e}") 50 | 51 | if __name__ == "__main__": 52 | main() 53 | -------------------------------------------------------------------------------- /agentic_memory/llm_controller.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional, Literal, Any 2 | import os 3 | import json 4 | from abc import ABC, abstractmethod 5 | from litellm import completion 6 | 7 | class BaseLLMController(ABC): 8 | @abstractmethod 9 | def get_completion(self, prompt: str) -> str: 10 | """Get completion from LLM""" 11 | pass 12 | 13 | class OpenAIController(BaseLLMController): 14 | def __init__(self, model: str = "gpt-4", api_key: Optional[str] = None): 15 | try: 16 | from openai import OpenAI 17 | self.model = model 18 | if api_key is None: 19 | api_key = os.getenv('OPENAI_API_KEY') 20 | if api_key is None: 21 | raise ValueError("OpenAI API key not found. Set OPENAI_API_KEY environment variable.") 22 | self.client = OpenAI(api_key=api_key) 23 | except ImportError: 24 | raise ImportError("OpenAI package not found. Install it with: pip install openai") 25 | 26 | def get_completion(self, prompt: str, response_format: dict, temperature: float = 0.7) -> str: 27 | response = self.client.chat.completions.create( 28 | model=self.model, 29 | messages=[ 30 | {"role": "system", "content": "You must respond with a JSON object."}, 31 | {"role": "user", "content": prompt} 32 | ], 33 | response_format=response_format, 34 | temperature=temperature, 35 | max_tokens=1000 36 | ) 37 | return response.choices[0].message.content 38 | 39 | class OllamaController(BaseLLMController): 40 | def __init__(self, model: str = "llama2"): 41 | from ollama import chat 42 | self.model = model 43 | 44 | def _generate_empty_value(self, schema_type: str, schema_items: dict = None) -> Any: 45 | if schema_type == "array": 46 | return [] 47 | elif schema_type == "string": 48 | return "" 49 | elif schema_type == "object": 50 | return {} 51 | elif schema_type == "number": 52 | return 0 53 | elif schema_type == "boolean": 54 | return False 55 | return None 56 | 57 | def _generate_empty_response(self, response_format: dict) -> dict: 58 | if "json_schema" not in response_format: 59 | return {} 60 | 61 | schema = response_format["json_schema"]["schema"] 62 | result = {} 63 | 64 | if "properties" in schema: 65 | for prop_name, prop_schema in schema["properties"].items(): 66 | result[prop_name] = self._generate_empty_value(prop_schema["type"], 67 | prop_schema.get("items")) 68 | 69 | return result 70 | 71 | def get_completion(self, prompt: str, response_format: dict, temperature: float = 0.7) -> str: 72 | # Allow exceptions (like ConnectionError) to bubble up for better debugging 73 | response = completion( 74 | model="ollama_chat/{}".format(self.model), 75 | messages=[ 76 | {"role": "system", "content": "You must respond with a JSON object."}, 77 | {"role": "user", "content": prompt} 78 | ], 79 | response_format=response_format, 80 | ) 81 | return response.choices[0].message.content 82 | 83 | class LLMController: 84 | """LLM-based controller for memory metadata generation""" 85 | def __init__(self, 86 | backend: Literal["openai", "ollama"] = "openai", 87 | model: str = "gpt-4", 88 | api_key: Optional[str] = None): 89 | if backend == "openai": 90 | self.llm = OpenAIController(model, api_key) 91 | elif backend == "ollama": 92 | self.llm = OllamaController(model) 93 | else: 94 | raise ValueError("Backend must be one of: 'openai', 'ollama'") 95 | 96 | def get_completion(self, prompt: str, response_format: dict = None, temperature: float = 0.7) -> str: 97 | return self.llm.get_completion(prompt, response_format, temperature) 98 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[codz] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | #poetry.toml 110 | 111 | # pdm 112 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 113 | # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. 114 | # https://pdm-project.org/en/latest/usage/project/#working-with-version-control 115 | #pdm.lock 116 | #pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # pixi 121 | # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. 122 | #pixi.lock 123 | # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one 124 | # in the .venv directory. It is recommended not to include this directory in version control. 125 | .pixi 126 | 127 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 128 | __pypackages__/ 129 | 130 | # Celery stuff 131 | celerybeat-schedule 132 | celerybeat.pid 133 | 134 | # SageMath parsed files 135 | *.sage.py 136 | 137 | # Environments 138 | .env 139 | .envrc 140 | .venv 141 | env/ 142 | venv/ 143 | ENV/ 144 | env.bak/ 145 | venv.bak/ 146 | 147 | # Spyder project settings 148 | .spyderproject 149 | .spyproject 150 | 151 | # Rope project settings 152 | .ropeproject 153 | 154 | # mkdocs documentation 155 | /site 156 | 157 | # mypy 158 | .mypy_cache/ 159 | .dmypy.json 160 | dmypy.json 161 | 162 | # Pyre type checker 163 | .pyre/ 164 | 165 | # pytype static type analyzer 166 | .pytype/ 167 | 168 | # Cython debug symbols 169 | cython_debug/ 170 | 171 | # PyCharm 172 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 173 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 174 | # and can be added to the global gitignore or merged into this file. For a more nuclear 175 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 176 | #.idea/ 177 | 178 | # Abstra 179 | # Abstra is an AI-powered process automation framework. 180 | # Ignore directories containing user credentials, local state, and settings. 181 | # Learn more at https://abstra.io/docs 182 | .abstra/ 183 | 184 | # Visual Studio Code 185 | # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 186 | # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore 187 | # and can be added to the global gitignore or merged into this file. However, if you prefer, 188 | # you could uncomment the following to ignore the entire vscode folder 189 | # .vscode/ 190 | 191 | # Ruff stuff: 192 | .ruff_cache/ 193 | 194 | # PyPI configuration file 195 | .pypirc 196 | 197 | # Cursor 198 | # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to 199 | # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data 200 | # refer to https://docs.cursor.com/context/ignore-files 201 | .cursorignore 202 | .cursorindexingignore 203 | 204 | # Marimo 205 | marimo/_static/ 206 | marimo/_lsp/ 207 | __marimo__/ 208 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Agentic Memory 🧠 2 | 3 | A novel agentic memory system for LLM agents that can dynamically organize memories in an agentic way. 4 | 5 | ## Introduction 🌟 6 | 7 | Large Language Model (LLM) agents have demonstrated remarkable capabilities in handling complex real-world tasks through external tool usage. However, to effectively leverage historical experiences, they require sophisticated memory systems. Traditional memory systems, while providing basic storage and retrieval functionality, often lack advanced memory organization capabilities. 8 | 9 | Our project introduces an innovative **Agentic Memory** system that revolutionizes how LLM agents manage and utilize their memories: 10 | 11 |
12 | Traditional Memory System 13 | Our Proposed Agentic Memory 14 |
15 | Comparison between traditional memory system (top) and our proposed agentic memory (bottom). Our system enables dynamic memory operations and flexible agent-memory interactions. 16 |
17 | 18 | > **Note:** This repository provides a memory system to facilitate agent construction. If you want to reproduce the results presented in our paper, please refer to: [https://github.com/WujiangXu/AgenticMemory](https://github.com/WujiangXu/AgenticMemory) 19 | 20 | For more details, please refer to our paper: [A-MEM: Agentic Memory for LLM Agents](https://arxiv.org/pdf/2502.12110) 21 | 22 | 23 | ## Key Features ✨ 24 | 25 | - 🔄 Dynamic memory organization based on Zettelkasten principles 26 | - 🔍 Intelligent indexing and linking of memories via ChromaDB 27 | - 📝 Comprehensive note generation with structured attributes 28 | - 🌐 Interconnected knowledge networks 29 | - 🧬 Continuous memory evolution and refinement 30 | - 🤖 Agent-driven decision making for adaptive memory management 31 | 32 | ## Framework 🏗️ 33 | 34 |
35 | Agentic Memory Framework 36 |
37 | The framework of our Agentic Memory system showing the dynamic interaction between LLM agents and memory components. 38 |
39 | 40 | ## How It Works 🛠️ 41 | 42 | When a new memory is added to the system: 43 | 1. Generates comprehensive notes with structured attributes 44 | 2. Creates contextual descriptions and tags 45 | 3. Analyzes historical memories for relevant connections 46 | 4. Establishes meaningful links based on similarities 47 | 5. Enables dynamic memory evolution and updates 48 | 49 | ## Results 📊 50 | 51 | Empirical experiments conducted on six foundation models demonstrate superior performance compared to existing SOTA baselines. 52 | 53 | ## Getting Started 🚀 54 | 55 | 1. Clone the repository: 56 | ```bash 57 | git clone https://github.com/agiresearch/A-mem.git 58 | cd A-mem 59 | ``` 60 | 61 | 2. Install dependencies: 62 | Create and activate a virtual environment (recommended): 63 | ```bash 64 | python -m venv .venv 65 | source .venv/bin/activate # On Windows, use: .venv\Scripts\activate 66 | ``` 67 | 68 | Install the package: 69 | ```bash 70 | pip install . 71 | ``` 72 | For development, you can install it in editable mode: 73 | ```bash 74 | pip install -e . 75 | ``` 76 | 77 | 3. Usage Examples 💡 78 | 79 | Here's how to use the Agentic Memory system for basic operations: 80 | 81 | ```python 82 | from agentic_memory.memory_system import AgenticMemorySystem 83 | 84 | # Initialize the memory system 🚀 85 | memory_system = AgenticMemorySystem( 86 | model_name='all-MiniLM-L6-v2', # Embedding model for ChromaDB 87 | llm_backend="openai", # LLM backend (openai/ollama) 88 | llm_model="gpt-4o-mini" # LLM model name 89 | ) 90 | 91 | # Add Memories ➕ 92 | # Simple addition 93 | memory_id = memory_system.add_note("Deep learning neural networks") 94 | 95 | # Addition with metadata 96 | memory_id = memory_system.add_note( 97 | content="Machine learning project notes", 98 | tags=["ml", "project"], 99 | category="Research", 100 | timestamp="202503021500" # YYYYMMDDHHmm format 101 | ) 102 | 103 | # Read (Retrieve) Memories 📖 104 | # Get memory by ID 105 | memory = memory_system.read(memory_id) 106 | print(f"Content: {memory.content}") 107 | print(f"Tags: {memory.tags}") 108 | print(f"Context: {memory.context}") 109 | print(f"Keywords: {memory.keywords}") 110 | 111 | # Search memories 112 | results = memory_system.search_agentic("neural networks", k=5) 113 | for result in results: 114 | print(f"ID: {result['id']}") 115 | print(f"Content: {result['content']}") 116 | print(f"Tags: {result['tags']}") 117 | print("---") 118 | 119 | # Update Memories 🔄 120 | memory_system.update(memory_id, content="Updated content about deep learning") 121 | 122 | # Delete Memories ❌ 123 | memory_system.delete(memory_id) 124 | 125 | # Memory Evolution 🧬 126 | # The system automatically evolves memories by: 127 | # 1. Finding semantic relationships using ChromaDB 128 | # 2. Updating metadata and context 129 | # 3. Creating connections between related memories 130 | # This happens automatically when adding or updating memories! 131 | ``` 132 | 133 | ### Advanced Features 🌟 134 | 135 | 1. **ChromaDB Vector Storage** 📦 136 | - Efficient vector embedding storage and retrieval 137 | - Fast semantic similarity search 138 | - Automatic metadata handling 139 | - Persistent memory storage 140 | 141 | 2. **Memory Evolution** 🧬 142 | - Automatically analyzes content relationships 143 | - Updates tags and context based on related memories 144 | - Creates semantic connections between memories 145 | 146 | 3. **Flexible Metadata** 📋 147 | - Custom tags and categories 148 | - Automatic keyword extraction 149 | - Context generation 150 | - Timestamp tracking 151 | 152 | 4. **Multiple LLM Backends** 🤖 153 | - OpenAI (GPT-4, GPT-3.5) 154 | - Ollama (for local deployment) 155 | 156 | ### Best Practices 💪 157 | 158 | 1. **Memory Creation** ✨: 159 | - Provide clear, specific content 160 | - Add relevant tags for better organization 161 | - Let the system handle context and keyword generation 162 | 163 | 2. **Memory Retrieval** 🔍: 164 | - Use specific search queries 165 | - Adjust 'k' parameter based on needed results 166 | - Consider both exact and semantic matches 167 | 168 | 3. **Memory Evolution** 🧬: 169 | - Allow automatic evolution to organize memories 170 | - Review generated connections periodically 171 | - Use consistent tagging conventions 172 | 173 | 4. **Error Handling** ⚠️: 174 | - Always check return values 175 | - Handle potential KeyError for non-existent memories 176 | - Use try-except blocks for LLM operations 177 | 178 | ## Citation 📚 179 | 180 | If you use this code in your research, please cite our work: 181 | 182 | ```bibtex 183 | @article{xu2025mem, 184 | title={A-mem: Agentic memory for llm agents}, 185 | author={Xu, Wujiang and Liang, Zujie and Mei, Kai and Gao, Hang and Tan, Juntao and Zhang, Yongfeng}, 186 | journal={arXiv preprint arXiv:2502.12110}, 187 | year={2025} 188 | } 189 | ``` 190 | 191 | ## License 📄 192 | 193 | This project is licensed under the MIT License. See LICENSE for details. 194 | -------------------------------------------------------------------------------- /tests/test_retriever.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from agentic_memory.retrievers import PersistentChromaRetriever 4 | 5 | 6 | def test_initialization(retriever): 7 | """Test ChromaRetriever initializes correctly.""" 8 | assert retriever.collection is not None 9 | assert retriever.embedding_function is not None 10 | 11 | 12 | def test_add_document(retriever, sample_metadata): 13 | """Test adding a document with metadata.""" 14 | doc_id = "test_doc_1" 15 | document = "This is a test document." 16 | 17 | retriever.add_document(document, sample_metadata, doc_id) 18 | 19 | results = retriever.collection.get(ids=[doc_id]) 20 | assert len(results["ids"]) == 1 21 | assert results["ids"][0] == doc_id 22 | 23 | 24 | def test_delete_document(retriever, sample_metadata): 25 | """Test deleting a document.""" 26 | doc_id = "test_doc_2" 27 | retriever.add_document("Test document", sample_metadata, doc_id) 28 | 29 | retriever.delete_document(doc_id) 30 | 31 | results = retriever.collection.get(ids=[doc_id]) 32 | assert len(results["ids"]) == 0 33 | 34 | 35 | def test_search(retriever, sample_metadata): 36 | """Test searching for similar documents.""" 37 | retriever.add_document( 38 | "Machine learning is fascinating", sample_metadata, "doc1") 39 | retriever.add_document( 40 | "Deep learning uses neural networks", sample_metadata, "doc2") 41 | retriever.add_document( 42 | "Cats are fluffy animals", sample_metadata, "doc3") 43 | 44 | results = retriever.search("artificial intelligence", k=2) 45 | 46 | assert len(results["ids"][0]) == 2 47 | assert len(results["documents"][0]) == 2 48 | 49 | 50 | def test_metadata_list_conversion(retriever): 51 | """Test that list metadata is properly converted.""" 52 | metadata = {"tags": ["tag1", "tag2", "tag3"]} 53 | retriever.add_document("Test doc", metadata, "doc_list") 54 | 55 | results = retriever.search("Test", k=1) 56 | 57 | retrieved_tags = results["metadatas"][0][0]["tags"] 58 | assert isinstance(retrieved_tags, list) 59 | assert retrieved_tags == ["tag1", "tag2", "tag3"] 60 | 61 | 62 | def test_metadata_dict_conversion(retriever): 63 | """Test that dict metadata is properly converted.""" 64 | metadata = {"config": {"nested": "value", "number": 123}} 65 | retriever.add_document("Test doc", metadata, "doc_dict") 66 | 67 | results = retriever.search("Test", k=1) 68 | 69 | retrieved_config = results["metadatas"][0][0]["config"] 70 | assert isinstance(retrieved_config, dict) 71 | assert retrieved_config["nested"] == "value" 72 | 73 | 74 | @pytest.mark.parametrize("value,expected_type", [ 75 | ("42", int), 76 | ("3.14", float), 77 | ("-10", int), 78 | ("hello", str), 79 | ]) 80 | def test_numeric_string_conversion(retriever, value, expected_type): 81 | """Test numeric string conversion in metadata.""" 82 | metadata = {"value": value} 83 | retriever.add_document("Test doc", metadata, f"doc_{value}") 84 | 85 | results = retriever.search("Test", k=1) 86 | 87 | retrieved_value = results["metadatas"][0][0]["value"] 88 | assert isinstance(retrieved_value, expected_type) 89 | 90 | 91 | def test_search_returns_top_k_results(retriever, sample_metadata): 92 | """Test that search respects the k parameter.""" 93 | for i in range(10): 94 | retriever.add_document( 95 | f"Document number {i}", sample_metadata, f"doc_{i}") 96 | 97 | results = retriever.search("Document", k=3) 98 | 99 | assert len(results["ids"][0]) == 3 100 | 101 | 102 | class TestPersistentChromaRetriever: 103 | """Test suite for PersistentChromaRetriever.""" 104 | 105 | def test_creates_new_collection(self, temp_db_dir): 106 | """Test creating a new persistent collection.""" 107 | retriever = PersistentChromaRetriever( 108 | directory=str(temp_db_dir), 109 | collection_name="new_collection" 110 | ) 111 | 112 | assert retriever.collection is not None 113 | assert retriever.collection_name == "new_collection" 114 | assert temp_db_dir.exists() 115 | 116 | @pytest.mark.parametrize("collection_name,extend,should_raise", [ 117 | ("existing_collection", False, True), # Existing collection, no extend -> error 118 | ("existing_collection", True, False), # Existing collection, extend -> success 119 | ("new_collection", False, False), # New collection, no extend -> success 120 | ("new_collection", True, False), # New collection, extend -> success 121 | ]) 122 | def test_collection_access_control( 123 | self, existing_collection, collection_name, extend, should_raise 124 | ): 125 | """Test collection access with different combinations of name and extend flag.""" 126 | temp_db_dir, existing_name = existing_collection 127 | 128 | if should_raise: 129 | with pytest.raises(ValueError, match="already exists"): 130 | PersistentChromaRetriever( 131 | directory=str(temp_db_dir), 132 | collection_name=collection_name, 133 | extend=extend 134 | ) 135 | else: 136 | retriever = PersistentChromaRetriever( 137 | directory=str(temp_db_dir), 138 | collection_name=collection_name, 139 | extend=extend 140 | ) 141 | assert retriever.collection is not None 142 | 143 | # If accessing existing collection, verify data is accessible 144 | if collection_name == existing_name: 145 | results = retriever.collection.get(ids=["existing_doc"]) 146 | assert len(results["ids"]) == 1 147 | 148 | def test_persistence_across_sessions(self, temp_db_dir, sample_metadata): 149 | """Test that data persists across different retriever instances.""" 150 | collection_name = "persistent_collection" 151 | 152 | # Session 1: Create and add document 153 | retriever1 = PersistentChromaRetriever( 154 | directory=str(temp_db_dir), 155 | collection_name=collection_name 156 | ) 157 | retriever1.add_document("Persistent data", sample_metadata, "persist_doc") 158 | del retriever1 159 | 160 | # Session 2: Reconnect and verify data exists 161 | retriever2 = PersistentChromaRetriever( 162 | directory=str(temp_db_dir), 163 | collection_name=collection_name, 164 | extend=True 165 | ) 166 | 167 | results = retriever2.collection.get(ids=["persist_doc"]) 168 | assert len(results["ids"]) == 1 169 | assert results["documents"][0] == "Persistent data" 170 | 171 | def test_uses_default_directory_when_none(self): 172 | """Test that default directory is used when none provided.""" 173 | retriever = PersistentChromaRetriever( 174 | collection_name="default_dir_collection" 175 | ) 176 | 177 | # Should use ~/.chromadb as default 178 | from pathlib import Path 179 | default_path = Path.home() / '.chromadb' 180 | assert default_path.exists() 181 | 182 | # Cleanup 183 | retriever.client.delete_collection("default_dir_collection") 184 | -------------------------------------------------------------------------------- /tests/test_memory_system.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from agentic_memory.memory_system import AgenticMemorySystem, MemoryNote 3 | from datetime import datetime 4 | 5 | class TestAgenticMemorySystem(unittest.TestCase): 6 | def setUp(self): 7 | """Set up test environment before each test.""" 8 | self.memory_system = AgenticMemorySystem( 9 | model_name='all-MiniLM-L6-v2', 10 | llm_backend="openai", 11 | llm_model="gpt-4o-mini" 12 | ) 13 | 14 | def test_create_memory(self): 15 | """Test creating a new memory with complete metadata.""" 16 | content = "Test memory content" 17 | tags = ["test", "memory"] 18 | keywords = ["test", "content"] 19 | links = ["link1", "link2"] 20 | context = "Test context" 21 | category = "Test category" 22 | timestamp = datetime.now().strftime("%Y%m%d%H%M") 23 | 24 | memory_id = self.memory_system.add_note( 25 | content=content, 26 | tags=tags, 27 | keywords=keywords, 28 | links=links, 29 | context=context, 30 | category=category, 31 | timestamp=timestamp 32 | ) 33 | 34 | # Verify memory was created 35 | self.assertIsNotNone(memory_id) 36 | memory = self.memory_system.read(memory_id) 37 | self.assertIsNotNone(memory) 38 | self.assertEqual(memory.content, content) 39 | self.assertEqual(memory.tags, tags) 40 | self.assertEqual(memory.keywords, keywords) 41 | self.assertEqual(memory.links, links) 42 | self.assertEqual(memory.context, context) 43 | self.assertEqual(memory.category, category) 44 | self.assertEqual(memory.timestamp, timestamp) 45 | 46 | def test_memory_metadata_persistence(self): 47 | """Test that memory metadata persists through ChromaDB storage and retrieval.""" 48 | # Create a memory with complex metadata 49 | content = "Complex test memory" 50 | tags = ["test", "complex", "metadata"] 51 | keywords = ["test", "complex", "keywords"] 52 | links = ["link1", "link2", "link3"] 53 | context = "Complex test context" 54 | category = "Complex test category" 55 | timestamp = datetime.now().strftime("%Y%m%d%H%M") 56 | evolution_history = ["evolution1", "evolution2"] 57 | 58 | memory_id = self.memory_system.add_note( 59 | content=content, 60 | tags=tags, 61 | keywords=keywords, 62 | links=links, 63 | context=context, 64 | category=category, 65 | timestamp=timestamp, 66 | evolution_history=evolution_history 67 | ) 68 | 69 | # Search for the memory using ChromaDB 70 | results = self.memory_system.search_agentic(content, k=1) 71 | self.assertGreater(len(results), 0) 72 | 73 | # Verify metadata in search results 74 | result = results[0] 75 | self.assertEqual(result['content'], content) 76 | self.assertEqual(result['tags'], tags) 77 | self.assertEqual(result['keywords'], keywords) 78 | self.assertEqual(result['context'], context) 79 | self.assertEqual(result['category'], category) 80 | 81 | def test_memory_update(self): 82 | """Test updating memory metadata through ChromaDB.""" 83 | # Create initial memory 84 | content = "Initial content" 85 | memory_id = self.memory_system.add_note(content=content) 86 | 87 | # Update memory with new metadata 88 | new_content = "Updated content" 89 | new_tags = ["updated", "tags"] 90 | new_keywords = ["updated", "keywords"] 91 | new_context = "Updated context" 92 | 93 | success = self.memory_system.update( 94 | memory_id, 95 | content=new_content, 96 | tags=new_tags, 97 | keywords=new_keywords, 98 | context=new_context 99 | ) 100 | 101 | self.assertTrue(success) 102 | 103 | # Verify updates in ChromaDB 104 | results = self.memory_system.search_agentic(new_content, k=1) 105 | self.assertGreater(len(results), 0) 106 | result = results[0] 107 | self.assertEqual(result['content'], new_content) 108 | self.assertEqual(result['tags'], new_tags) 109 | self.assertEqual(result['keywords'], new_keywords) 110 | self.assertEqual(result['context'], new_context) 111 | 112 | def test_memory_relationships(self): 113 | """Test memory relationships and linked memories.""" 114 | # Create related memories 115 | content1 = "First memory" 116 | content2 = "Second memory" 117 | content3 = "Third memory" 118 | 119 | id1 = self.memory_system.add_note(content1) 120 | id2 = self.memory_system.add_note(content2) 121 | id3 = self.memory_system.add_note(content3) 122 | 123 | # Add relationships 124 | memory1 = self.memory_system.read(id1) 125 | memory2 = self.memory_system.read(id2) 126 | memory3 = self.memory_system.read(id3) 127 | 128 | memory1.links.append(id2) 129 | memory2.links.append(id1) 130 | memory2.links.append(id3) 131 | memory3.links.append(id2) 132 | 133 | # Update memories with relationships 134 | self.memory_system.update(id1, links=memory1.links) 135 | self.memory_system.update(id2, links=memory2.links) 136 | self.memory_system.update(id3, links=memory3.links) 137 | 138 | # Test relationship retrieval 139 | results = self.memory_system.search_agentic(content1, k=3) 140 | self.assertGreater(len(results), 0) 141 | 142 | # Verify relationships are maintained 143 | memory1_updated = self.memory_system.read(id1) 144 | self.assertIn(id2, memory1_updated.links) 145 | 146 | def test_memory_evolution(self): 147 | """Test memory evolution system with ChromaDB.""" 148 | # Create related memories 149 | contents = [ 150 | "Deep learning neural networks", 151 | "Neural network architectures", 152 | "Training deep neural networks" 153 | ] 154 | 155 | memory_ids = [] 156 | for content in contents: 157 | memory_id = self.memory_system.add_note(content) 158 | memory_ids.append(memory_id) 159 | 160 | # Verify that memories have been properly evolved 161 | for memory_id in memory_ids: 162 | memory = self.memory_system.read(memory_id) 163 | self.assertIsNotNone(memory.tags) 164 | self.assertIsNotNone(memory.context) 165 | self.assertIsNotNone(memory.keywords) 166 | 167 | # Test evolution through search 168 | results = self.memory_system.search_agentic("neural networks", k=3) 169 | self.assertGreater(len(results), 0) 170 | 171 | # Verify evolution metadata 172 | for result in results: 173 | self.assertIsNotNone(result['tags']) 174 | self.assertIsNotNone(result['context']) 175 | self.assertIsNotNone(result['keywords']) 176 | 177 | def test_memory_deletion(self): 178 | """Test memory deletion from ChromaDB.""" 179 | # Create and delete a memory 180 | content = "Memory to delete" 181 | memory_id = self.memory_system.add_note(content) 182 | 183 | # Verify memory exists 184 | memory = self.memory_system.read(memory_id) 185 | self.assertIsNotNone(memory) 186 | 187 | # Delete memory 188 | success = self.memory_system.delete(memory_id) 189 | self.assertTrue(success) 190 | 191 | # Verify deletion 192 | memory = self.memory_system.read(memory_id) 193 | self.assertIsNone(memory) 194 | 195 | # Verify memory is removed from ChromaDB 196 | results = self.memory_system.search_agentic(content, k=1) 197 | self.assertEqual(len(results), 0) 198 | 199 | def test_memory_consolidation(self): 200 | """Test memory consolidation with ChromaDB.""" 201 | # Create multiple memories 202 | contents = [ 203 | "Memory 1", 204 | "Memory 2", 205 | "Memory 3" 206 | ] 207 | 208 | for content in contents: 209 | self.memory_system.add_note(content) 210 | 211 | # Force consolidation 212 | self.memory_system.consolidate_memories() 213 | 214 | # Verify memories are still accessible 215 | for content in contents: 216 | results = self.memory_system.search_agentic(content, k=1) 217 | self.assertGreater(len(results), 0) 218 | self.assertEqual(results[0]['content'], content) 219 | 220 | def test_find_related_memories(self): 221 | """Test finding related memories.""" 222 | # Create test memories 223 | contents = [ 224 | "Python programming language", 225 | "Python data science", 226 | "Machine learning with Python", 227 | "Web development with JavaScript" 228 | ] 229 | 230 | for content in contents: 231 | self.memory_system.add_note(content) 232 | 233 | # Test finding related memories 234 | results = self.memory_system.find_related_memories("Python", k=2) 235 | self.assertGreater(len(results), 0) 236 | 237 | def test_find_related_memories_raw(self): 238 | """Test finding related memories with raw format.""" 239 | # Create test memories 240 | contents = [ 241 | "Python programming language", 242 | "Python data science", 243 | "Machine learning with Python" 244 | ] 245 | 246 | for content in contents: 247 | self.memory_system.add_note(content) 248 | 249 | # Test finding related memories in raw format 250 | results = self.memory_system.find_related_memories_raw("Python", k=2) 251 | self.assertIsNotNone(results) 252 | 253 | def test_process_memory(self): 254 | """Test memory processing and evolution.""" 255 | # Create a test memory 256 | content = "Test memory for processing" 257 | memory_id = self.memory_system.add_note(content) 258 | 259 | # Get the memory 260 | memory = self.memory_system.read(memory_id) 261 | 262 | # Process the memory 263 | should_evolve, processed_memory = self.memory_system.process_memory(memory) 264 | 265 | # Verify processing results 266 | self.assertIsInstance(should_evolve, bool) 267 | self.assertIsInstance(processed_memory, MemoryNote) 268 | self.assertIsNotNone(processed_memory.tags) 269 | self.assertIsNotNone(processed_memory.context) 270 | self.assertIsNotNone(processed_memory.keywords) 271 | 272 | if __name__ == '__main__': 273 | unittest.main() 274 | -------------------------------------------------------------------------------- /agentic_memory/retrievers.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from typing import Dict, List, Optional 4 | import ast 5 | import tempfile 6 | import atexit 7 | 8 | import chromadb 9 | from chromadb.config import Settings 10 | from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction 11 | from nltk.tokenize import word_tokenize 12 | 13 | 14 | def simple_tokenize(text): 15 | return word_tokenize(text) 16 | 17 | 18 | def _clone_collection( 19 | src: chromadb.Collection, 20 | dest: chromadb.Collection, 21 | batch_size: int = 10 22 | ): 23 | """ 24 | Copies one ChromaDB collection to another. 25 | Enables duplicating of collections. 26 | This seemed to be the only (best) way to do this as the official ChromaDB 27 | docs also suggest this method: 28 | """ 29 | existing_count = src.count() 30 | for i in range(0, existing_count, batch_size): 31 | batch = src.get( 32 | include=["metadatas", "documents", "embeddings"], 33 | limit=batch_size, 34 | offset=i) 35 | dest.add( 36 | ids=batch["ids"], 37 | documents=batch["documents"], 38 | metadatas=batch["metadatas"], 39 | embeddings=batch["embeddings"]) 40 | 41 | 42 | class ChromaRetriever: 43 | """Vector database retrieval using ChromaDB""" 44 | 45 | def __init__( 46 | self, 47 | collection_name: str = "memories", 48 | model_name: str = "all-MiniLM-L6-v2" 49 | ): 50 | """Initialize ChromaDB retriever. 51 | 52 | Args: 53 | collection_name: Name of the ChromaDB collection 54 | """ 55 | self.client = chromadb.Client(Settings(allow_reset=True)) 56 | self.embedding_function = SentenceTransformerEmbeddingFunction( 57 | model_name=model_name 58 | ) 59 | self.collection = self.client.get_or_create_collection( 60 | name=collection_name, embedding_function=self.embedding_function 61 | ) 62 | 63 | def add_document(self, document: str, metadata: Dict, doc_id: str): 64 | """Add a document to ChromaDB. 65 | 66 | Args: 67 | document: Text content to add 68 | metadata: Dictionary of metadata 69 | doc_id: Unique identifier for the document 70 | """ 71 | # Convert MemoryNote object to serializable format 72 | processed_metadata = {} 73 | for key, value in metadata.items(): 74 | if isinstance(value, list): 75 | processed_metadata[key] = json.dumps(value) 76 | elif isinstance(value, dict): 77 | processed_metadata[key] = json.dumps(value) 78 | else: 79 | processed_metadata[key] = str(value) 80 | 81 | self.collection.add( 82 | documents=[document], metadatas=[processed_metadata], ids=[doc_id] 83 | ) 84 | 85 | def delete_document(self, doc_id: str): 86 | """Delete a document from ChromaDB. 87 | 88 | Args: 89 | doc_id: ID of document to delete 90 | """ 91 | self.collection.delete(ids=[doc_id]) 92 | 93 | def search(self, query: str, k: int = 5): 94 | """Search for similar documents. 95 | 96 | Args: 97 | query: Query text 98 | k: Number of results to return 99 | 100 | Returns: 101 | Dict with documents, metadatas, ids, and distances 102 | """ 103 | results = self.collection.query(query_texts=[query], n_results=k) 104 | 105 | if (results is not None) and (results.get("metadatas", [])): 106 | results["metadatas"] = self._convert_metadata_types( 107 | results["metadatas"]) 108 | 109 | return results 110 | 111 | def _convert_metadata_types( 112 | self, 113 | metadatas: List[List[Dict]] 114 | ) -> List[List[Dict]]: 115 | """Convert string metadata back to original types. 116 | 117 | Args: 118 | metadatas: List of metadata lists from query results 119 | 120 | Returns: 121 | Converted metadata structure 122 | """ 123 | for query_metadatas in metadatas: 124 | if isinstance(query_metadatas, List): 125 | for metadata_dict in query_metadatas: 126 | if isinstance(metadata_dict, Dict): 127 | self._convert_metadata_dict(metadata_dict) 128 | return metadatas 129 | 130 | def _convert_metadata_dict(self, metadata: Dict) -> None: 131 | """Convert metadata values from strings to appropriate types in-place. 132 | 133 | Args: 134 | metadata: Single metadata dictionary to convert 135 | """ 136 | for key, value in metadata.items(): 137 | # only attempt to convert strings 138 | if not isinstance(value, str): 139 | continue 140 | else: 141 | try: 142 | metadata[key] = ast.literal_eval(value) 143 | except Exception: 144 | pass 145 | 146 | 147 | class PersistentChromaRetriever(ChromaRetriever): 148 | """ 149 | Persistent ChromaDB client/retriever to facilitate sharing of memory from 150 | multiple agents across sessions. 151 | Simply changes how the client and collection are initialized. Other 152 | functionality is inherited from ChromaRetriever. 153 | """ 154 | 155 | def __init__( 156 | self, 157 | directory: Optional[str] = None, 158 | collection_name: str = "memories", 159 | model_name: str = "all-MiniLM-L6-v2", 160 | extend: bool = False 161 | ): 162 | """ 163 | Initialize persistent ChromaDB retriever. 164 | 165 | :param directory: Directory path for ChromaDB storage. Defaults to 166 | '~/.chromadb' if None. 167 | :collection_name: Name of the ChromaDB collection. 168 | :model_name: SentenceTransformer model name for embeddings. 169 | :extend: If True, allows initializes client and retriever from 170 | collection if it exists. Raises error if False and collection 171 | already exists. This prevents accidental overwriting of 172 | existing collections. 173 | """ 174 | if directory is None: 175 | directory = Path.home() / '.chromadb' 176 | directory.mkdir(parents=True, exist_ok=True) 177 | elif isinstance(directory, str): 178 | directory = Path(directory) 179 | 180 | try: 181 | directory.resolve(strict=True) 182 | except FileNotFoundError: 183 | directory.mkdir(parents=True, exist_ok=True) 184 | except Exception as e: 185 | raise ValueError(f'Error accessing directory: {e}') 186 | 187 | # Use PersistentClient instead of regular Client 188 | self.client = chromadb.PersistentClient(path=str(directory)) 189 | self.embedding_function = SentenceTransformerEmbeddingFunction( 190 | model_name=model_name) 191 | 192 | existing_collections = [col.name for col in self.client.list_collections()] 193 | 194 | if collection_name in existing_collections: 195 | if extend: 196 | self.collection = self.client.get_collection(name=collection_name) 197 | else: 198 | raise ValueError( 199 | f"Collection '{collection_name}' already exists. " 200 | "Use extend=True to add to it." 201 | ) 202 | else: 203 | self.collection = self.client.get_or_create_collection( 204 | name=collection_name, 205 | embedding_function=self.embedding_function 206 | ) 207 | self.collection_name = collection_name 208 | 209 | 210 | class CopiedChromaRetriever(PersistentChromaRetriever): 211 | """ 212 | ChromaDB retriever that creates a copy of an existing collection 213 | under to a temporary ChromaDB instance. 214 | Useful for creating isolated copies of shared starting memory collections. 215 | """ 216 | 217 | def __init__( 218 | self, 219 | directory: Optional[str] = None, 220 | collection_name: str = "memories", 221 | model_name: str = "all-MiniLM-L6-v2", 222 | _dest_collection_name: Optional[str] = None, 223 | _copy_batch_size: int = 10, 224 | ): 225 | """ 226 | Initialize the CopiedChromaDB retriever. 227 | 228 | :param directory: Directory path for source ChromaDB storage. If None, 229 | defaults to '~/.chromadb'. 230 | :param collection_name: Name of the source ChromaDB collection to copy. 231 | :param model_name: SentenceTransformer model name for embeddings. 232 | :param _dest_collection_name: Optional name for the destination 233 | collection. If None, defaults to '{collection_name}__clone'. 234 | This parameter is marked as private as the class itself is meant 235 | for single use and discard db that exists in a temporary so naming 236 | the copied collection is most likely not needed. 237 | :param _copy_batch_size: Number of documents to copy per batch. 238 | Shouldn't need to be changed normally. 239 | """ 240 | 241 | self.embedding_function = SentenceTransformerEmbeddingFunction( 242 | model_name=model_name) 243 | 244 | # ensure source is valid 245 | if directory is None: 246 | directory = Path.home() / '.chromadb' 247 | directory.mkdir(parents=True, exist_ok=True) 248 | elif isinstance(directory, str): 249 | directory = Path(directory) 250 | self._src_client = chromadb.PersistentClient(path=str(directory)) 251 | 252 | self._src = self._src_client.get_collection(name=collection_name) 253 | existing_collections = [ 254 | col.name for col in self._src_client.list_collections()] 255 | if collection_name not in existing_collections: 256 | raise ValueError( 257 | f"Collection '{collection_name}' to be copied does not exist." 258 | ) 259 | 260 | # use temp directory for destination collection 261 | try: 262 | self._tmpdir = tempfile.TemporaryDirectory( 263 | prefix='chromadb_ephemeral_') 264 | self._tmp_path = Path(self._tmpdir.name) 265 | self._dst_client = chromadb.PersistentClient( 266 | path=str(self._tmp_path) 267 | ) 268 | self.collection_name = ( 269 | _dest_collection_name 270 | or f"{collection_name}__clone" 271 | ) 272 | self.collection = self._dst_client.get_or_create_collection( 273 | name=self.collection_name, 274 | embedding_function=self.embedding_function, 275 | metadata=self._src.metadata 276 | ) 277 | except Exception as e: 278 | raise ValueError(f"Error creating temporary ChromaDB: {e}") 279 | 280 | try: 281 | _clone_collection( 282 | src=self._src, 283 | dest=self.collection, 284 | batch_size=_copy_batch_size, 285 | ) 286 | except Exception as e: 287 | raise ValueError(f"Error cloning ChromaDB collection: {e}") 288 | 289 | atexit.register(self.close) 290 | 291 | def close(self): 292 | """Cleanup temporary directory.""" 293 | try: 294 | self._dst_client.delete_collection(self.collection_name) 295 | except Exception: 296 | pass 297 | try: 298 | self._tmpdir.cleanup() 299 | except Exception: 300 | pass 301 | 302 | def __exit__(self, exc_type, exc_value, traceback): 303 | self.close() 304 | -------------------------------------------------------------------------------- /agentic_memory/memory_system.py: -------------------------------------------------------------------------------- 1 | import keyword 2 | from typing import List, Dict, Optional, Any, Tuple 3 | import uuid 4 | from datetime import datetime 5 | from .llm_controller import LLMController 6 | from .retrievers import ChromaRetriever 7 | import json 8 | import logging 9 | from rank_bm25 import BM25Okapi 10 | from sentence_transformers import SentenceTransformer 11 | import numpy as np 12 | from sklearn.metrics.pairwise import cosine_similarity 13 | import os 14 | from abc import ABC, abstractmethod 15 | from transformers import AutoModel, AutoTokenizer 16 | from nltk.tokenize import word_tokenize 17 | import pickle 18 | from pathlib import Path 19 | from litellm import completion 20 | import time 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | class MemoryNote: 25 | """A memory note that represents a single unit of information in the memory system. 26 | 27 | This class encapsulates all metadata associated with a memory, including: 28 | - Core content and identifiers 29 | - Temporal information (creation and access times) 30 | - Semantic metadata (keywords, context, tags) 31 | - Relationship data (links to other memories) 32 | - Usage statistics (retrieval count) 33 | - Evolution tracking (history of changes) 34 | """ 35 | 36 | def __init__(self, 37 | content: str, 38 | id: Optional[str] = None, 39 | keywords: Optional[List[str]] = None, 40 | links: Optional[Dict] = None, 41 | retrieval_count: Optional[int] = None, 42 | timestamp: Optional[str] = None, 43 | last_accessed: Optional[str] = None, 44 | context: Optional[str] = None, 45 | evolution_history: Optional[List] = None, 46 | category: Optional[str] = None, 47 | tags: Optional[List[str]] = None): 48 | """Initialize a new memory note with its associated metadata. 49 | 50 | Args: 51 | content (str): The main text content of the memory 52 | id (Optional[str]): Unique identifier for the memory. If None, a UUID will be generated 53 | keywords (Optional[List[str]]): Key terms extracted from the content 54 | links (Optional[Dict]): References to related memories 55 | retrieval_count (Optional[int]): Number of times this memory has been accessed 56 | timestamp (Optional[str]): Creation time in format YYYYMMDDHHMM 57 | last_accessed (Optional[str]): Last access time in format YYYYMMDDHHMM 58 | context (Optional[str]): The broader context or domain of the memory 59 | evolution_history (Optional[List]): Record of how the memory has evolved 60 | category (Optional[str]): Classification category 61 | tags (Optional[List[str]]): Additional classification tags 62 | """ 63 | # Core content and ID 64 | self.content = content 65 | self.id = id or str(uuid.uuid4()) 66 | 67 | # Semantic metadata 68 | self.keywords = keywords or [] 69 | self.links = links or [] 70 | self.context = context or "General" 71 | self.category = category or "Uncategorized" 72 | self.tags = tags or [] 73 | 74 | # Temporal information 75 | current_time = datetime.now().strftime("%Y%m%d%H%M") 76 | self.timestamp = timestamp or current_time 77 | self.last_accessed = last_accessed or current_time 78 | 79 | # Usage and evolution data 80 | self.retrieval_count = retrieval_count or 0 81 | self.evolution_history = evolution_history or [] 82 | 83 | class AgenticMemorySystem: 84 | """Core memory system that manages memory notes and their evolution. 85 | 86 | This system provides: 87 | - Memory creation, retrieval, update, and deletion 88 | - Content analysis and metadata extraction 89 | - Memory evolution and relationship management 90 | - Hybrid search capabilities 91 | """ 92 | 93 | def __init__(self, 94 | model_name: str = 'all-MiniLM-L6-v2', 95 | llm_backend: str = "openai", 96 | llm_model: str = "gpt-4o-mini", 97 | evo_threshold: int = 100, 98 | api_key: Optional[str] = None): 99 | """Initialize the memory system. 100 | 101 | Args: 102 | model_name: Name of the sentence transformer model 103 | llm_backend: LLM backend to use (openai/ollama) 104 | llm_model: Name of the LLM model 105 | evo_threshold: Number of memories before triggering evolution 106 | api_key: API key for the LLM service 107 | """ 108 | self.memories = {} 109 | self.model_name = model_name 110 | # Initialize ChromaDB retriever with empty collection 111 | try: 112 | # First try to reset the collection if it exists 113 | temp_retriever = ChromaRetriever(collection_name="memories",model_name=self.model_name) 114 | temp_retriever.client.reset() 115 | except Exception as e: 116 | logger.warning(f"Could not reset ChromaDB collection: {e}") 117 | 118 | # Create a fresh retriever instance 119 | self.retriever = ChromaRetriever(collection_name="memories",model_name=self.model_name) 120 | 121 | # Initialize LLM controller 122 | self.llm_controller = LLMController(llm_backend, llm_model, api_key) 123 | self.evo_cnt = 0 124 | self.evo_threshold = evo_threshold 125 | 126 | # Evolution system prompt 127 | self._evolution_system_prompt = ''' 128 | You are an AI memory evolution agent responsible for managing and evolving a knowledge base. 129 | Analyze the the new memory note according to keywords and context, also with their several nearest neighbors memory. 130 | Make decisions about its evolution. 131 | 132 | The new memory context: 133 | {context} 134 | content: {content} 135 | keywords: {keywords} 136 | 137 | The nearest neighbors memories: 138 | {nearest_neighbors_memories} 139 | 140 | Based on this information, determine: 141 | 1. Should this memory be evolved? Consider its relationships with other memories. 142 | 2. What specific actions should be taken (strengthen, update_neighbor)? 143 | 2.1 If choose to strengthen the connection, which memory should it be connected to? Can you give the updated tags of this memory? 144 | 2.2 If choose to update_neighbor, you can update the context and tags of these memories based on the understanding of these memories. If the context and the tags are not updated, the new context and tags should be the same as the original ones. Generate the new context and tags in the sequential order of the input neighbors. 145 | Tags should be determined by the content of these characteristic of these memories, which can be used to retrieve them later and categorize them. 146 | Note that the length of new_tags_neighborhood must equal the number of input neighbors, and the length of new_context_neighborhood must equal the number of input neighbors. 147 | The number of neighbors is {neighbor_number}. 148 | Return your decision in JSON format with the following structure: 149 | {{ 150 | "should_evolve": True or False, 151 | "actions": ["strengthen", "update_neighbor"], 152 | "suggested_connections": ["neighbor_memory_ids"], 153 | "tags_to_update": ["tag_1",..."tag_n"], 154 | "new_context_neighborhood": ["new context",...,"new context"], 155 | "new_tags_neighborhood": [["tag_1",...,"tag_n"],...["tag_1",...,"tag_n"]], 156 | }} 157 | ''' 158 | 159 | def analyze_content(self, content: str) -> Dict: 160 | """Analyze content using LLM to extract semantic metadata. 161 | 162 | Uses a language model to understand the content and extract: 163 | - Keywords: Important terms and concepts 164 | - Context: Overall domain or theme 165 | - Tags: Classification categories 166 | 167 | Args: 168 | content (str): The text content to analyze 169 | 170 | Returns: 171 | Dict: Contains extracted metadata with keys: 172 | - keywords: List[str] 173 | - context: str 174 | - tags: List[str] 175 | """ 176 | prompt = """Generate a structured analysis of the following content by: 177 | 1. Identifying the most salient keywords (focus on nouns, verbs, and key concepts) 178 | 2. Extracting core themes and contextual elements 179 | 3. Creating relevant categorical tags 180 | 181 | Format the response as a JSON object: 182 | { 183 | "keywords": [ 184 | // several specific, distinct keywords that capture key concepts and terminology 185 | // Order from most to least important 186 | // Don't include keywords that are the name of the speaker or time 187 | // At least three keywords, but don't be too redundant. 188 | ], 189 | "context": 190 | // one sentence summarizing: 191 | // - Main topic/domain 192 | // - Key arguments/points 193 | // - Intended audience/purpose 194 | , 195 | "tags": [ 196 | // several broad categories/themes for classification 197 | // Include domain, format, and type tags 198 | // At least three tags, but don't be too redundant. 199 | ] 200 | } 201 | 202 | Content for analysis: 203 | """ + content 204 | try: 205 | response = self.llm_controller.llm.get_completion(prompt, response_format={"type": "json_schema", "json_schema": { 206 | "name": "response", 207 | "schema": { 208 | "type": "object", 209 | "properties": { 210 | "keywords": { 211 | "type": "array", 212 | "items": { 213 | "type": "string" 214 | } 215 | }, 216 | "context": { 217 | "type": "string", 218 | }, 219 | "tags": { 220 | "type": "array", 221 | "items": { 222 | "type": "string" 223 | } 224 | } 225 | } 226 | } 227 | }}) 228 | return json.loads(response) 229 | except Exception as e: 230 | print(f"Error analyzing content: {e}") 231 | return {"keywords": [], "context": "General", "tags": []} 232 | 233 | def add_note(self, content: str, time: str = None, **kwargs) -> str: 234 | """Add a new memory note""" 235 | # Create MemoryNote without llm_controller 236 | if time is not None: 237 | kwargs['timestamp'] = time 238 | note = MemoryNote(content=content, **kwargs) 239 | 240 | # Update retriever with all documents 241 | evo_label, note = self.process_memory(note) 242 | self.memories[note.id] = note 243 | 244 | # Add to ChromaDB with complete metadata 245 | metadata = { 246 | "id": note.id, 247 | "content": note.content, 248 | "keywords": note.keywords, 249 | "links": note.links, 250 | "retrieval_count": note.retrieval_count, 251 | "timestamp": note.timestamp, 252 | "last_accessed": note.last_accessed, 253 | "context": note.context, 254 | "evolution_history": note.evolution_history, 255 | "category": note.category, 256 | "tags": note.tags 257 | } 258 | self.retriever.add_document(note.content, metadata, note.id) 259 | 260 | if evo_label == True: 261 | self.evo_cnt += 1 262 | if self.evo_cnt % self.evo_threshold == 0: 263 | self.consolidate_memories() 264 | return note.id 265 | 266 | def consolidate_memories(self): 267 | """Consolidate memories: update retriever with new documents""" 268 | # Reset ChromaDB collection 269 | self.retriever = ChromaRetriever(collection_name="memories",model_name=self.model_name) 270 | 271 | # Re-add all memory documents with their complete metadata 272 | for memory in self.memories.values(): 273 | metadata = { 274 | "id": memory.id, 275 | "content": memory.content, 276 | "keywords": memory.keywords, 277 | "links": memory.links, 278 | "retrieval_count": memory.retrieval_count, 279 | "timestamp": memory.timestamp, 280 | "last_accessed": memory.last_accessed, 281 | "context": memory.context, 282 | "evolution_history": memory.evolution_history, 283 | "category": memory.category, 284 | "tags": memory.tags 285 | } 286 | self.retriever.add_document(memory.content, metadata, memory.id) 287 | 288 | def find_related_memories(self, query: str, k: int = 5) -> Tuple[str, List[int]]: 289 | """Find related memories using ChromaDB retrieval""" 290 | if not self.memories: 291 | return "", [] 292 | 293 | try: 294 | # Get results from ChromaDB 295 | results = self.retriever.search(query, k) 296 | 297 | # Convert to list of memories 298 | memory_str = "" 299 | indices = [] 300 | 301 | if 'ids' in results and results['ids'] and len(results['ids']) > 0 and len(results['ids'][0]) > 0: 302 | for i, doc_id in enumerate(results['ids'][0]): 303 | # Get metadata from ChromaDB results 304 | if i < len(results['metadatas'][0]): 305 | metadata = results['metadatas'][0][i] 306 | # Format memory string 307 | memory_str += f"memory index:{i}\ttalk start time:{metadata.get('timestamp', '')}\tmemory content: {metadata.get('content', '')}\tmemory context: {metadata.get('context', '')}\tmemory keywords: {str(metadata.get('keywords', []))}\tmemory tags: {str(metadata.get('tags', []))}\n" 308 | indices.append(i) 309 | 310 | return memory_str, indices 311 | except Exception as e: 312 | logger.error(f"Error in find_related_memories: {str(e)}") 313 | return "", [] 314 | 315 | def find_related_memories_raw(self, query: str, k: int = 5) -> str: 316 | """Find related memories using ChromaDB retrieval in raw format""" 317 | if not self.memories: 318 | return "" 319 | 320 | # Get results from ChromaDB 321 | results = self.retriever.search(query, k) 322 | 323 | # Convert to list of memories 324 | memory_str = "" 325 | 326 | if 'ids' in results and results['ids'] and len(results['ids']) > 0: 327 | for i, doc_id in enumerate(results['ids'][0][:k]): 328 | if i < len(results['metadatas'][0]): 329 | # Get metadata from ChromaDB results 330 | metadata = results['metadatas'][0][i] 331 | 332 | # Add main memory info 333 | memory_str += f"talk start time:{metadata.get('timestamp', '')}\tmemory content: {metadata.get('content', '')}\tmemory context: {metadata.get('context', '')}\tmemory keywords: {str(metadata.get('keywords', []))}\tmemory tags: {str(metadata.get('tags', []))}\n" 334 | 335 | # Add linked memories if available 336 | links = metadata.get('links', []) 337 | j = 0 338 | for link_id in links: 339 | if link_id in self.memories and j < k: 340 | neighbor = self.memories[link_id] 341 | memory_str += f"talk start time:{neighbor.timestamp}\tmemory content: {neighbor.content}\tmemory context: {neighbor.context}\tmemory keywords: {str(neighbor.keywords)}\tmemory tags: {str(neighbor.tags)}\n" 342 | j += 1 343 | 344 | return memory_str 345 | 346 | def read(self, memory_id: str) -> Optional[MemoryNote]: 347 | """Retrieve a memory note by its ID. 348 | 349 | Args: 350 | memory_id (str): ID of the memory to retrieve 351 | 352 | Returns: 353 | MemoryNote if found, None otherwise 354 | """ 355 | return self.memories.get(memory_id) 356 | 357 | def update(self, memory_id: str, **kwargs) -> bool: 358 | """Update a memory note. 359 | 360 | Args: 361 | memory_id: ID of memory to update 362 | **kwargs: Fields to update 363 | 364 | Returns: 365 | bool: True if update successful 366 | """ 367 | if memory_id not in self.memories: 368 | return False 369 | 370 | note = self.memories[memory_id] 371 | 372 | # Update fields 373 | for key, value in kwargs.items(): 374 | if hasattr(note, key): 375 | setattr(note, key, value) 376 | 377 | # Update in ChromaDB 378 | metadata = { 379 | "id": note.id, 380 | "content": note.content, 381 | "keywords": note.keywords, 382 | "links": note.links, 383 | "retrieval_count": note.retrieval_count, 384 | "timestamp": note.timestamp, 385 | "last_accessed": note.last_accessed, 386 | "context": note.context, 387 | "evolution_history": note.evolution_history, 388 | "category": note.category, 389 | "tags": note.tags 390 | } 391 | 392 | # Delete and re-add to update 393 | self.retriever.delete_document(memory_id) 394 | self.retriever.add_document(document=note.content, metadata=metadata, doc_id=memory_id) 395 | 396 | return True 397 | 398 | def delete(self, memory_id: str) -> bool: 399 | """Delete a memory note by its ID. 400 | 401 | Args: 402 | memory_id (str): ID of the memory to delete 403 | 404 | Returns: 405 | bool: True if memory was deleted, False if not found 406 | """ 407 | if memory_id in self.memories: 408 | # Delete from ChromaDB 409 | self.retriever.delete_document(memory_id) 410 | # Delete from local storage 411 | del self.memories[memory_id] 412 | return True 413 | return False 414 | 415 | def _search_raw(self, query: str, k: int = 5) -> List[Dict[str, Any]]: 416 | """Internal search method that returns raw results from ChromaDB. 417 | 418 | This is used internally by the memory evolution system to find 419 | related memories for potential evolution. 420 | 421 | Args: 422 | query (str): The search query text 423 | k (int): Maximum number of results to return 424 | 425 | Returns: 426 | List[Dict[str, Any]]: Raw search results from ChromaDB 427 | """ 428 | results = self.retriever.search(query, k) 429 | return [{'id': doc_id, 'score': score} 430 | for doc_id, score in zip(results['ids'][0], results['distances'][0])] 431 | 432 | def search(self, query: str, k: int = 5) -> List[Dict[str, Any]]: 433 | """Search for memories using a hybrid retrieval approach.""" 434 | # Get results from ChromaDB (only do this once) 435 | search_results = self.retriever.search(query, k) 436 | memories = [] 437 | 438 | # Process ChromaDB results 439 | for i, doc_id in enumerate(search_results['ids'][0]): 440 | memory = self.memories.get(doc_id) 441 | if memory: 442 | memories.append({ 443 | 'id': doc_id, 444 | 'content': memory.content, 445 | 'context': memory.context, 446 | 'keywords': memory.keywords, 447 | 'score': search_results['distances'][0][i] 448 | }) 449 | 450 | return memories[:k] 451 | 452 | def _search(self, query: str, k: int = 5) -> List[Dict[str, Any]]: 453 | """Search for memories using a hybrid retrieval approach. 454 | 455 | This method combines results from both: 456 | 1. ChromaDB vector store (semantic similarity) 457 | 2. Embedding-based retrieval (dense vectors) 458 | 459 | The results are deduplicated and ranked by relevance. 460 | 461 | Args: 462 | query (str): The search query text 463 | k (int): Maximum number of results to return 464 | 465 | Returns: 466 | List[Dict[str, Any]]: List of search results, each containing: 467 | - id: Memory ID 468 | - content: Memory content 469 | - score: Similarity score 470 | - metadata: Additional memory metadata 471 | """ 472 | # Get results from ChromaDB 473 | chroma_results = self.retriever.search(query, k) 474 | memories = [] 475 | 476 | # Process ChromaDB results 477 | for i, doc_id in enumerate(chroma_results['ids'][0]): 478 | memory = self.memories.get(doc_id) 479 | if memory: 480 | memories.append({ 481 | 'id': doc_id, 482 | 'content': memory.content, 483 | 'context': memory.context, 484 | 'keywords': memory.keywords, 485 | 'score': chroma_results['distances'][0][i] 486 | }) 487 | 488 | # Get results from embedding retriever 489 | embedding_results = self.retriever.search(query, k) 490 | 491 | # Combine results with deduplication 492 | seen_ids = set(m['id'] for m in memories) 493 | for result in embedding_results: 494 | memory_id = result.get('id') 495 | if memory_id and memory_id not in seen_ids: 496 | memory = self.memories.get(memory_id) 497 | if memory: 498 | memories.append({ 499 | 'id': memory_id, 500 | 'content': memory.content, 501 | 'context': memory.context, 502 | 'keywords': memory.keywords, 503 | 'score': result.get('score', 0.0) 504 | }) 505 | seen_ids.add(memory_id) 506 | 507 | return memories[:k] 508 | 509 | def search_agentic(self, query: str, k: int = 5) -> List[Dict[str, Any]]: 510 | """Search for memories using ChromaDB retrieval.""" 511 | if not self.memories: 512 | return [] 513 | 514 | try: 515 | # Get results from ChromaDB 516 | results = self.retriever.search(query, k) 517 | 518 | # Process results 519 | memories = [] 520 | seen_ids = set() 521 | 522 | # Check if we have valid results 523 | if ('ids' not in results or not results['ids'] or 524 | len(results['ids']) == 0 or len(results['ids'][0]) == 0): 525 | return [] 526 | 527 | # Process ChromaDB results 528 | for i, doc_id in enumerate(results['ids'][0][:k]): 529 | if doc_id in seen_ids: 530 | continue 531 | 532 | if i < len(results['metadatas'][0]): 533 | metadata = results['metadatas'][0][i] 534 | 535 | # Create result dictionary with all metadata fields 536 | memory_dict = { 537 | 'id': doc_id, 538 | 'content': metadata.get('content', ''), 539 | 'context': metadata.get('context', ''), 540 | 'keywords': metadata.get('keywords', []), 541 | 'tags': metadata.get('tags', []), 542 | 'timestamp': metadata.get('timestamp', ''), 543 | 'category': metadata.get('category', 'Uncategorized'), 544 | 'is_neighbor': False 545 | } 546 | 547 | # Add score if available 548 | if 'distances' in results and len(results['distances']) > 0 and i < len(results['distances'][0]): 549 | memory_dict['score'] = results['distances'][0][i] 550 | 551 | memories.append(memory_dict) 552 | seen_ids.add(doc_id) 553 | 554 | # Add linked memories (neighbors) 555 | neighbor_count = 0 556 | for memory in list(memories): # Use a copy to avoid modification during iteration 557 | if neighbor_count >= k: 558 | break 559 | 560 | # Get links from metadata 561 | links = memory.get('links', []) 562 | if not links and 'id' in memory: 563 | # Try to get links from memory object 564 | mem_obj = self.memories.get(memory['id']) 565 | if mem_obj: 566 | links = mem_obj.links 567 | 568 | for link_id in links: 569 | if link_id not in seen_ids and neighbor_count < k: 570 | neighbor = self.memories.get(link_id) 571 | if neighbor: 572 | memories.append({ 573 | 'id': link_id, 574 | 'content': neighbor.content, 575 | 'context': neighbor.context, 576 | 'keywords': neighbor.keywords, 577 | 'tags': neighbor.tags, 578 | 'timestamp': neighbor.timestamp, 579 | 'category': neighbor.category, 580 | 'is_neighbor': True 581 | }) 582 | seen_ids.add(link_id) 583 | neighbor_count += 1 584 | 585 | return memories[:k] 586 | except Exception as e: 587 | logger.error(f"Error in search_agentic: {str(e)}") 588 | return [] 589 | 590 | def process_memory(self, note: MemoryNote) -> Tuple[bool, MemoryNote]: 591 | """Process a memory note and determine if it should evolve. 592 | 593 | Args: 594 | note: The memory note to process 595 | 596 | Returns: 597 | Tuple[bool, MemoryNote]: (should_evolve, processed_note) 598 | """ 599 | # For first memory or testing, just return the note without evolution 600 | if not self.memories: 601 | return False, note 602 | 603 | try: 604 | # Get nearest neighbors 605 | neighbors_text, indices = self.find_related_memories(note.content, k=5) 606 | if not neighbors_text or not indices: 607 | return False, note 608 | 609 | # Format neighbors for LLM - in this case, neighbors_text is already formatted 610 | 611 | # Query LLM for evolution decision 612 | prompt = self._evolution_system_prompt.format( 613 | content=note.content, 614 | context=note.context, 615 | keywords=note.keywords, 616 | nearest_neighbors_memories=neighbors_text, 617 | neighbor_number=len(indices) 618 | ) 619 | 620 | try: 621 | response = self.llm_controller.llm.get_completion( 622 | prompt, 623 | response_format={"type": "json_schema", "json_schema": { 624 | "name": "response", 625 | "schema": { 626 | "type": "object", 627 | "properties": { 628 | "should_evolve": { 629 | "type": "boolean" 630 | }, 631 | "actions": { 632 | "type": "array", 633 | "items": { 634 | "type": "string" 635 | } 636 | }, 637 | "suggested_connections": { 638 | "type": "array", 639 | "items": { 640 | "type": "string" 641 | } 642 | }, 643 | "new_context_neighborhood": { 644 | "type": "array", 645 | "items": { 646 | "type": "string" 647 | } 648 | }, 649 | "tags_to_update": { 650 | "type": "array", 651 | "items": { 652 | "type": "string" 653 | } 654 | }, 655 | "new_tags_neighborhood": { 656 | "type": "array", 657 | "items": { 658 | "type": "array", 659 | "items": { 660 | "type": "string" 661 | } 662 | } 663 | } 664 | }, 665 | "required": ["should_evolve", "actions", "suggested_connections", 666 | "tags_to_update", "new_context_neighborhood", "new_tags_neighborhood"], 667 | "additionalProperties": False 668 | }, 669 | "strict": True 670 | }} 671 | ) 672 | 673 | response_json = json.loads(response) 674 | should_evolve = response_json["should_evolve"] 675 | 676 | if should_evolve: 677 | actions = response_json["actions"] 678 | for action in actions: 679 | if action == "strengthen": 680 | suggest_connections = response_json["suggested_connections"] 681 | new_tags = response_json["tags_to_update"] 682 | note.links.extend(suggest_connections) 683 | note.tags = new_tags 684 | elif action == "update_neighbor": 685 | new_context_neighborhood = response_json["new_context_neighborhood"] 686 | new_tags_neighborhood = response_json["new_tags_neighborhood"] 687 | noteslist = list(self.memories.values()) 688 | notes_id = list(self.memories.keys()) 689 | 690 | for i in range(min(len(indices), len(new_tags_neighborhood))): 691 | # Skip if we don't have enough neighbors 692 | if i >= len(indices): 693 | continue 694 | 695 | tag = new_tags_neighborhood[i] 696 | if i < len(new_context_neighborhood): 697 | context = new_context_neighborhood[i] 698 | else: 699 | # Since indices are just numbers now, we need to find the memory 700 | # In memory list using its index number 701 | if i < len(noteslist): 702 | context = noteslist[i].context 703 | else: 704 | continue 705 | 706 | # Get index from the indices list 707 | if i < len(indices): 708 | memorytmp_idx = indices[i] 709 | # Make sure the index is valid 710 | if memorytmp_idx < len(noteslist): 711 | notetmp = noteslist[memorytmp_idx] 712 | notetmp.tags = tag 713 | notetmp.context = context 714 | # Make sure the index is valid 715 | if memorytmp_idx < len(notes_id): 716 | self.memories[notes_id[memorytmp_idx]] = notetmp 717 | 718 | return should_evolve, note 719 | 720 | except (json.JSONDecodeError, KeyError, Exception) as e: 721 | logger.error(f"Error in memory evolution: {str(e)}") 722 | return False, note 723 | 724 | except Exception as e: 725 | # For testing purposes, catch all exceptions and return the original note 726 | logger.error(f"Error in process_memory: {str(e)}") 727 | return False, note 728 | --------------------------------------------------------------------------------