├── tests
    ├── __init__.py
    ├── test_utils.py
    ├── conftest.py
    ├── test_retriever.py
    └── test_memory_system.py
├── agentic_memory
    ├── __init__.py
    ├── llm_controller.py
    ├── retrievers.py
    └── memory_system.py
├── Figure
    ├── framework.jpg
    ├── intro-a.jpg
    └── intro-b.jpg
├── requirements.txt
├── .pre-commit-config.yaml
├── LICENSE
├── pyproject.toml
├── examples
    └── sovereign_memory.py
├── .gitignore
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 |  


--------------------------------------------------------------------------------
/agentic_memory/__init__.py:
--------------------------------------------------------------------------------
1 |  


--------------------------------------------------------------------------------
/Figure/framework.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/agiresearch/A-mem/HEAD/Figure/framework.jpg


--------------------------------------------------------------------------------
/Figure/intro-a.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/agiresearch/A-mem/HEAD/Figure/intro-a.jpg


--------------------------------------------------------------------------------
/Figure/intro-b.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/agiresearch/A-mem/HEAD/Figure/intro-b.jpg


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | sentence-transformers>=2.2.2
 2 | chromadb>=0.4.22
 3 | rank_bm25>=0.2.2
 4 | nltk>=3.8.1
 5 | transformers>=4.36.2
 6 | litellm>=1.16.11
 7 | numpy>=1.24.3
 8 | scikit-learn>=1.3.2
 9 | openai>=1.3.7
10 | ollama>=0.1.0
11 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v6.0.0
 4 |     hooks:
 5 |       - id: end-of-file-fixer
 6 | 
 7 | -   repo: https://github.com/astral-sh/ruff-pre-commit
 8 |     rev: v0.14.0
 9 |     hooks:
10 |       - id: ruff
11 |         args: ["--line-length=100"]
12 |       # linter.
13 |       - id: ruff-check
14 |         types_or: [ python, pyi ]
15 |         args: [ --fix ]
16 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | """Test utilities for the memory system."""
 2 | from typing import List
 3 | from agentic_memory.llm_controller import BaseLLMController
 4 | 
 5 | class MockLLMController(BaseLLMController):
 6 |     """Mock LLM controller for testing"""
 7 |     def __init__(self):
 8 |         self.mock_response = "{}"
 9 |         
10 |     def get_completion(self, prompt: str, response_format: dict = None, temperature: float = 0.7) -> str:
11 |         """Mock completion that returns the pre-set response"""
12 |         return self.mock_response
13 |         
14 |     def get_embedding(self, text: str) -> List[float]:
15 |         """Mock embedding that returns a zero vector"""
16 |         return [0.0] * 384  # Mock embedding vector
17 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 AGI Research
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "agentic-memory"
 7 | version = "0.0.1"
 8 | description = "A library for implementing agentic memory in LLM applications."
 9 | readme = "README.md"
10 | requires-python = ">=3.8"
11 | license = { file = "LICENSE" }
12 | classifiers = [
13 |     "Programming Language :: Python :: 3",
14 |     "License :: OSI Approved :: MIT License",
15 |     "Operating System :: OS Independent",
16 |     "Topic :: Scientific/Engineering :: Artificial Intelligence",
17 | ]
18 | dependencies = [
19 |     "sentence-transformers>=2.2.2",
20 |     "chromadb>=0.4.22",
21 |     "rank_bm25>=0.2.2",
22 |     "nltk>=3.8.1",
23 |     "litellm>=1.16.11",
24 |     "numpy>=1.24.3",
25 |     "scikit-learn>=1.3.2",
26 |     "openai>=1.3.7",
27 |     "pre-commit>=3.4.0",
28 | ]
29 | 
30 | [project.optional-dependencies]
31 | dev = [
32 |     "pytest",
33 |     "unittest",
34 |     "ruff",
35 |     "ipykernel",
36 | ]
37 | 
38 | [tool.setuptools.packages.find]
39 | where = ["."]
40 | include = ["agentic_memory*"]
41 | exclude = ["tests*"]
42 | 
43 | [project.urls]
44 | "Homepage" = "https://github.com/agiresearch/A-mem"
45 | "Bug Tracker" = "https://github.com/agiresearch/A-mem/issues"
46 | 
47 | [tool.pytest.ini_options]
48 | minversion = "8.0"
49 | testpaths = ["tests"]
50 | addopts = "-q"
51 | pythonpath = ["."]
52 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tempfile
 3 | import shutil
 4 | from pathlib import Path
 5 | 
 6 | from agentic_memory.retrievers import ChromaRetriever, PersistentChromaRetriever
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def retriever():
11 |     """Fixture providing a clean ChromaRetriever instance."""
12 |     retriever = ChromaRetriever(collection_name="test_memories")
13 |     yield retriever
14 |     # Cleanup: reset the collection after each test
15 |     retriever.client.reset()
16 | 
17 | 
18 | @pytest.fixture
19 | def sample_metadata():
20 |     """Fixture providing sample metadata with various types."""
21 |     return {
22 |         "timestamp": "2024-01-01T00:00:00",
23 |         "tags": ["test", "memory"],
24 |         "config": {"key": "value"},
25 |         "count": 42,
26 |         "score": 0.95
27 |     }
28 | 
29 | 
30 | @pytest.fixture
31 | def temp_db_dir():
32 |     """Fixture providing a temporary directory for persistent ChromaDB."""
33 |     temp_dir = tempfile.mkdtemp()
34 |     yield Path(temp_dir)
35 |     # Cleanup: remove the temporary directory after test
36 |     shutil.rmtree(temp_dir, ignore_errors=True)
37 | 
38 |     
39 | @pytest.fixture
40 | def existing_collection(temp_db_dir, sample_metadata):
41 |     """Fixture that creates a pre-existing collection with data."""
42 |     retriever = PersistentChromaRetriever(
43 |         directory=str(temp_db_dir),
44 |         collection_name="existing_collection"
45 |     )
46 |     retriever.add_document("Existing document", sample_metadata, "existing_doc")
47 |     return temp_db_dir, "existing_collection"
48 | 


--------------------------------------------------------------------------------
/examples/sovereign_memory.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | # Ensure we can import from source
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 6 | 
 7 | from agentic_memory.memory_system import AgenticMemorySystem
 8 | 
 9 | def main():
10 |     print("🧠 Initializing A-mem Sovereign System (Local)...")
11 |     
12 |     # Initialize with local backend
13 |     # Note: Requires Ollama running with 'llama3' pulled
14 |     try:
15 |         memory_system = AgenticMemorySystem(
16 |             model_name='all-MiniLM-L6-v2',  # Local embeddings (via sentence-transformers)
17 |             llm_backend="ollama",
18 |             llm_model="llama3" 
19 |         )
20 |         print("✅ System initialized.")
21 |     except Exception as e:
22 |         print(f"❌ Init failed: {e}")
23 |         return
24 | 
25 |     # Add a memory
26 |     print("\n📝 Adding Sovereign Memory...")
27 |     content = "The user values data sovereignty and local processing above all else."
28 |     try:
29 |         # Note: A-mem automatically generates tags/context via LLM here
30 |         memory_id = memory_system.add_note(
31 |             content=content,
32 |             tags=["sovereign", "privacy"],
33 |             category="Principles"
34 |         )
35 |         print(f"   Memory stored with ID: {memory_id}")
36 |     except Exception as e:
37 |         print(f"❌ Failed to store memory: {e}")
38 |         return
39 | 
40 |     # Retrieve
41 |     print("\n🔍 Retrieving Memory...")
42 |     try:
43 |         results = memory_system.search_agentic("sovereignty", k=1)
44 |         for res in results:
45 |             print(f"   Found: {res['content']}")
46 |             print(f"   Tags: {res['tags']}")
47 |             print(f"   Context (LLM Generated): {res.get('context', 'N/A')}")
48 |     except Exception as e:
49 |         print(f"❌ Retrieval failed: {e}")
50 | 
51 | if __name__ == "__main__":
52 |     main()
53 | 


--------------------------------------------------------------------------------
/agentic_memory/llm_controller.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Optional, Literal, Any
 2 | import os
 3 | import json
 4 | from abc import ABC, abstractmethod
 5 | from litellm import completion
 6 | 
 7 | class BaseLLMController(ABC):
 8 |     @abstractmethod
 9 |     def get_completion(self, prompt: str) -> str:
10 |         """Get completion from LLM"""
11 |         pass
12 | 
13 | class OpenAIController(BaseLLMController):
14 |     def __init__(self, model: str = "gpt-4", api_key: Optional[str] = None):
15 |         try:
16 |             from openai import OpenAI
17 |             self.model = model
18 |             if api_key is None:
19 |                 api_key = os.getenv('OPENAI_API_KEY')
20 |             if api_key is None:
21 |                 raise ValueError("OpenAI API key not found. Set OPENAI_API_KEY environment variable.")
22 |             self.client = OpenAI(api_key=api_key)
23 |         except ImportError:
24 |             raise ImportError("OpenAI package not found. Install it with: pip install openai")
25 |     
26 |     def get_completion(self, prompt: str, response_format: dict, temperature: float = 0.7) -> str:
27 |         response = self.client.chat.completions.create(
28 |             model=self.model,
29 |             messages=[
30 |                 {"role": "system", "content": "You must respond with a JSON object."},
31 |                 {"role": "user", "content": prompt}
32 |             ],
33 |             response_format=response_format,
34 |             temperature=temperature,
35 |             max_tokens=1000
36 |         )
37 |         return response.choices[0].message.content
38 | 
39 | class OllamaController(BaseLLMController):
40 |     def __init__(self, model: str = "llama2"):
41 |         from ollama import chat
42 |         self.model = model
43 |     
44 |     def _generate_empty_value(self, schema_type: str, schema_items: dict = None) -> Any:
45 |         if schema_type == "array":
46 |             return []
47 |         elif schema_type == "string":
48 |             return ""
49 |         elif schema_type == "object":
50 |             return {}
51 |         elif schema_type == "number":
52 |             return 0
53 |         elif schema_type == "boolean":
54 |             return False
55 |         return None
56 | 
57 |     def _generate_empty_response(self, response_format: dict) -> dict:
58 |         if "json_schema" not in response_format:
59 |             return {}
60 |             
61 |         schema = response_format["json_schema"]["schema"]
62 |         result = {}
63 |         
64 |         if "properties" in schema:
65 |             for prop_name, prop_schema in schema["properties"].items():
66 |                 result[prop_name] = self._generate_empty_value(prop_schema["type"], 
67 |                                                             prop_schema.get("items"))
68 |         
69 |         return result
70 | 
71 |     def get_completion(self, prompt: str, response_format: dict, temperature: float = 0.7) -> str:
72 |         # Allow exceptions (like ConnectionError) to bubble up for better debugging
73 |         response = completion(
74 |             model="ollama_chat/{}".format(self.model),
75 |             messages=[
76 |                 {"role": "system", "content": "You must respond with a JSON object."},
77 |                 {"role": "user", "content": prompt}
78 |             ],
79 |             response_format=response_format,
80 |         )
81 |         return response.choices[0].message.content
82 | 
83 | class LLMController:
84 |     """LLM-based controller for memory metadata generation"""
85 |     def __init__(self, 
86 |                  backend: Literal["openai", "ollama"] = "openai",
87 |                  model: str = "gpt-4", 
88 |                  api_key: Optional[str] = None):
89 |         if backend == "openai":
90 |             self.llm = OpenAIController(model, api_key)
91 |         elif backend == "ollama":
92 |             self.llm = OllamaController(model)
93 |         else:
94 |             raise ValueError("Backend must be one of: 'openai', 'ollama'")
95 |             
96 |     def get_completion(self, prompt: str, response_format: dict = None, temperature: float = 0.7) -> str:
97 |         return self.llm.get_completion(prompt, response_format, temperature)
98 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[codz]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # UV
 98 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #uv.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | #poetry.toml
110 | 
111 | # pdm
112 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113 | #   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114 | #   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115 | #pdm.lock
116 | #pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 | 
120 | # pixi
121 | #   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122 | #pixi.lock
123 | #   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124 | #   in the .venv directory. It is recommended not to include this directory in version control.
125 | .pixi
126 | 
127 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128 | __pypackages__/
129 | 
130 | # Celery stuff
131 | celerybeat-schedule
132 | celerybeat.pid
133 | 
134 | # SageMath parsed files
135 | *.sage.py
136 | 
137 | # Environments
138 | .env
139 | .envrc
140 | .venv
141 | env/
142 | venv/
143 | ENV/
144 | env.bak/
145 | venv.bak/
146 | 
147 | # Spyder project settings
148 | .spyderproject
149 | .spyproject
150 | 
151 | # Rope project settings
152 | .ropeproject
153 | 
154 | # mkdocs documentation
155 | /site
156 | 
157 | # mypy
158 | .mypy_cache/
159 | .dmypy.json
160 | dmypy.json
161 | 
162 | # Pyre type checker
163 | .pyre/
164 | 
165 | # pytype static type analyzer
166 | .pytype/
167 | 
168 | # Cython debug symbols
169 | cython_debug/
170 | 
171 | # PyCharm
172 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
175 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
176 | #.idea/
177 | 
178 | # Abstra
179 | # Abstra is an AI-powered process automation framework.
180 | # Ignore directories containing user credentials, local state, and settings.
181 | # Learn more at https://abstra.io/docs
182 | .abstra/
183 | 
184 | # Visual Studio Code
185 | #  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 
186 | #  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187 | #  and can be added to the global gitignore or merged into this file. However, if you prefer, 
188 | #  you could uncomment the following to ignore the entire vscode folder
189 | # .vscode/
190 | 
191 | # Ruff stuff:
192 | .ruff_cache/
193 | 
194 | # PyPI configuration file
195 | .pypirc
196 | 
197 | # Cursor
198 | #  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199 | #  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200 | #  refer to https://docs.cursor.com/context/ignore-files
201 | .cursorignore
202 | .cursorindexingignore
203 | 
204 | # Marimo
205 | marimo/_static/
206 | marimo/_lsp/
207 | __marimo__/
208 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Agentic Memory 🧠
  2 | 
  3 | A novel agentic memory system for LLM agents that can dynamically organize memories in an agentic way.
  4 | 
  5 | ## Introduction 🌟
  6 | 
  7 | Large Language Model (LLM) agents have demonstrated remarkable capabilities in handling complex real-world tasks through external tool usage. However, to effectively leverage historical experiences, they require sophisticated memory systems. Traditional memory systems, while providing basic storage and retrieval functionality, often lack advanced memory organization capabilities.
  8 | 
  9 | Our project introduces an innovative **Agentic Memory** system that revolutionizes how LLM agents manage and utilize their memories:
 10 | 
 11 | <div align="center">
 12 |   <img src="Figure/intro-a.jpg" alt="Traditional Memory System" width="600"/>
 13 |   <img src="Figure/intro-b.jpg" alt="Our Proposed Agentic Memory" width="600"/>
 14 |   <br>
 15 |   <em>Comparison between traditional memory system (top) and our proposed agentic memory (bottom). Our system enables dynamic memory operations and flexible agent-memory interactions.</em>
 16 | </div>
 17 | 
 18 | > **Note:** This repository provides a memory system to facilitate agent construction. If you want to reproduce the results presented in our paper, please refer to: [https://github.com/WujiangXu/AgenticMemory](https://github.com/WujiangXu/AgenticMemory)
 19 | 
 20 | For more details, please refer to our paper: [A-MEM: Agentic Memory for LLM Agents](https://arxiv.org/pdf/2502.12110)
 21 | 
 22 | 
 23 | ## Key Features ✨
 24 | 
 25 | - 🔄 Dynamic memory organization based on Zettelkasten principles
 26 | - 🔍 Intelligent indexing and linking of memories via ChromaDB
 27 | - 📝 Comprehensive note generation with structured attributes
 28 | - 🌐 Interconnected knowledge networks
 29 | - 🧬 Continuous memory evolution and refinement
 30 | - 🤖 Agent-driven decision making for adaptive memory management
 31 | 
 32 | ## Framework 🏗️
 33 | 
 34 | <div align="center">
 35 |   <img src="Figure/framework.jpg" alt="Agentic Memory Framework" width="800"/>
 36 |   <br>
 37 |   <em>The framework of our Agentic Memory system showing the dynamic interaction between LLM agents and memory components.</em>
 38 | </div>
 39 | 
 40 | ## How It Works 🛠️
 41 | 
 42 | When a new memory is added to the system:
 43 | 1. Generates comprehensive notes with structured attributes
 44 | 2. Creates contextual descriptions and tags
 45 | 3. Analyzes historical memories for relevant connections
 46 | 4. Establishes meaningful links based on similarities
 47 | 5. Enables dynamic memory evolution and updates
 48 | 
 49 | ## Results 📊
 50 | 
 51 | Empirical experiments conducted on six foundation models demonstrate superior performance compared to existing SOTA baselines.
 52 | 
 53 | ## Getting Started 🚀
 54 | 
 55 | 1. Clone the repository:
 56 | ```bash
 57 | git clone https://github.com/agiresearch/A-mem.git
 58 | cd A-mem
 59 | ```
 60 | 
 61 | 2. Install dependencies:
 62 | Create and activate a virtual environment (recommended):
 63 | ```bash
 64 | python -m venv .venv
 65 | source .venv/bin/activate  # On Windows, use: .venv\Scripts\activate
 66 | ```
 67 | 
 68 | Install the package:
 69 | ```bash
 70 | pip install .
 71 | ```
 72 | For development, you can install it in editable mode:
 73 | ```bash
 74 | pip install -e .
 75 | ```
 76 | 
 77 | 3. Usage Examples 💡
 78 | 
 79 | Here's how to use the Agentic Memory system for basic operations:
 80 | 
 81 | ```python
 82 | from agentic_memory.memory_system import AgenticMemorySystem
 83 | 
 84 | # Initialize the memory system 🚀
 85 | memory_system = AgenticMemorySystem(
 86 |     model_name='all-MiniLM-L6-v2',  # Embedding model for ChromaDB
 87 |     llm_backend="openai",           # LLM backend (openai/ollama)
 88 |     llm_model="gpt-4o-mini"         # LLM model name
 89 | )
 90 | 
 91 | # Add Memories ➕
 92 | # Simple addition
 93 | memory_id = memory_system.add_note("Deep learning neural networks")
 94 | 
 95 | # Addition with metadata
 96 | memory_id = memory_system.add_note(
 97 |     content="Machine learning project notes",
 98 |     tags=["ml", "project"],
 99 |     category="Research",
100 |     timestamp="202503021500"  # YYYYMMDDHHmm format
101 | )
102 | 
103 | # Read (Retrieve) Memories 📖
104 | # Get memory by ID
105 | memory = memory_system.read(memory_id)
106 | print(f"Content: {memory.content}")
107 | print(f"Tags: {memory.tags}")
108 | print(f"Context: {memory.context}")
109 | print(f"Keywords: {memory.keywords}")
110 | 
111 | # Search memories
112 | results = memory_system.search_agentic("neural networks", k=5)
113 | for result in results:
114 |     print(f"ID: {result['id']}")
115 |     print(f"Content: {result['content']}")
116 |     print(f"Tags: {result['tags']}")
117 |     print("---")
118 | 
119 | # Update Memories 🔄
120 | memory_system.update(memory_id, content="Updated content about deep learning")
121 | 
122 | # Delete Memories ❌
123 | memory_system.delete(memory_id)
124 | 
125 | # Memory Evolution 🧬
126 | # The system automatically evolves memories by:
127 | # 1. Finding semantic relationships using ChromaDB
128 | # 2. Updating metadata and context
129 | # 3. Creating connections between related memories
130 | # This happens automatically when adding or updating memories!
131 | ```
132 | 
133 | ### Advanced Features 🌟
134 | 
135 | 1. **ChromaDB Vector Storage** 📦
136 |    - Efficient vector embedding storage and retrieval
137 |    - Fast semantic similarity search
138 |    - Automatic metadata handling
139 |    - Persistent memory storage
140 | 
141 | 2. **Memory Evolution** 🧬
142 |    - Automatically analyzes content relationships
143 |    - Updates tags and context based on related memories
144 |    - Creates semantic connections between memories
145 | 
146 | 3. **Flexible Metadata** 📋
147 |    - Custom tags and categories
148 |    - Automatic keyword extraction
149 |    - Context generation
150 |    - Timestamp tracking
151 | 
152 | 4. **Multiple LLM Backends** 🤖
153 |    - OpenAI (GPT-4, GPT-3.5)
154 |    - Ollama (for local deployment)
155 | 
156 | ### Best Practices 💪
157 | 
158 | 1. **Memory Creation** ✨:
159 |    - Provide clear, specific content
160 |    - Add relevant tags for better organization
161 |    - Let the system handle context and keyword generation
162 | 
163 | 2. **Memory Retrieval** 🔍:
164 |    - Use specific search queries
165 |    - Adjust 'k' parameter based on needed results
166 |    - Consider both exact and semantic matches
167 | 
168 | 3. **Memory Evolution** 🧬:
169 |    - Allow automatic evolution to organize memories
170 |    - Review generated connections periodically
171 |    - Use consistent tagging conventions
172 | 
173 | 4. **Error Handling** ⚠️:
174 |    - Always check return values
175 |    - Handle potential KeyError for non-existent memories
176 |    - Use try-except blocks for LLM operations
177 | 
178 | ## Citation 📚
179 | 
180 | If you use this code in your research, please cite our work:
181 | 
182 | ```bibtex
183 | @article{xu2025mem,
184 |   title={A-mem: Agentic memory for llm agents},
185 |   author={Xu, Wujiang and Liang, Zujie and Mei, Kai and Gao, Hang and Tan, Juntao and Zhang, Yongfeng},
186 |   journal={arXiv preprint arXiv:2502.12110},
187 |   year={2025}
188 | }
189 | ```
190 | 
191 | ## License 📄
192 | 
193 | This project is licensed under the MIT License. See LICENSE for details.
194 | 


--------------------------------------------------------------------------------
/tests/test_retriever.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from agentic_memory.retrievers import PersistentChromaRetriever
  4 | 
  5 | 
  6 | def test_initialization(retriever):
  7 |     """Test ChromaRetriever initializes correctly."""
  8 |     assert retriever.collection is not None
  9 |     assert retriever.embedding_function is not None
 10 | 
 11 | 
 12 | def test_add_document(retriever, sample_metadata):
 13 |     """Test adding a document with metadata."""
 14 |     doc_id = "test_doc_1"
 15 |     document = "This is a test document."
 16 |     
 17 |     retriever.add_document(document, sample_metadata, doc_id)
 18 |     
 19 |     results = retriever.collection.get(ids=[doc_id])
 20 |     assert len(results["ids"]) == 1
 21 |     assert results["ids"][0] == doc_id
 22 | 
 23 | 
 24 | def test_delete_document(retriever, sample_metadata):
 25 |     """Test deleting a document."""
 26 |     doc_id = "test_doc_2"
 27 |     retriever.add_document("Test document", sample_metadata, doc_id)
 28 |     
 29 |     retriever.delete_document(doc_id)
 30 |     
 31 |     results = retriever.collection.get(ids=[doc_id])
 32 |     assert len(results["ids"]) == 0
 33 | 
 34 | 
 35 | def test_search(retriever, sample_metadata):
 36 |     """Test searching for similar documents."""
 37 |     retriever.add_document(
 38 |         "Machine learning is fascinating", sample_metadata, "doc1")
 39 |     retriever.add_document(
 40 |         "Deep learning uses neural networks", sample_metadata, "doc2")
 41 |     retriever.add_document(
 42 |         "Cats are fluffy animals", sample_metadata, "doc3")
 43 |     
 44 |     results = retriever.search("artificial intelligence", k=2)
 45 |     
 46 |     assert len(results["ids"][0]) == 2
 47 |     assert len(results["documents"][0]) == 2
 48 | 
 49 | 
 50 | def test_metadata_list_conversion(retriever):
 51 |     """Test that list metadata is properly converted."""
 52 |     metadata = {"tags": ["tag1", "tag2", "tag3"]}
 53 |     retriever.add_document("Test doc", metadata, "doc_list")
 54 |     
 55 |     results = retriever.search("Test", k=1)
 56 |     
 57 |     retrieved_tags = results["metadatas"][0][0]["tags"]
 58 |     assert isinstance(retrieved_tags, list)
 59 |     assert retrieved_tags == ["tag1", "tag2", "tag3"]
 60 | 
 61 | 
 62 | def test_metadata_dict_conversion(retriever):
 63 |     """Test that dict metadata is properly converted."""
 64 |     metadata = {"config": {"nested": "value", "number": 123}}
 65 |     retriever.add_document("Test doc", metadata, "doc_dict")
 66 |     
 67 |     results = retriever.search("Test", k=1)
 68 |     
 69 |     retrieved_config = results["metadatas"][0][0]["config"]
 70 |     assert isinstance(retrieved_config, dict)
 71 |     assert retrieved_config["nested"] == "value"
 72 | 
 73 | 
 74 | @pytest.mark.parametrize("value,expected_type", [
 75 |     ("42", int),
 76 |     ("3.14", float),
 77 |     ("-10", int),
 78 |     ("hello", str),
 79 | ])
 80 | def test_numeric_string_conversion(retriever, value, expected_type):
 81 |     """Test numeric string conversion in metadata."""
 82 |     metadata = {"value": value}
 83 |     retriever.add_document("Test doc", metadata, f"doc_{value}")
 84 |     
 85 |     results = retriever.search("Test", k=1)
 86 |     
 87 |     retrieved_value = results["metadatas"][0][0]["value"]
 88 |     assert isinstance(retrieved_value, expected_type)
 89 | 
 90 | 
 91 | def test_search_returns_top_k_results(retriever, sample_metadata):
 92 |     """Test that search respects the k parameter."""
 93 |     for i in range(10):
 94 |         retriever.add_document(
 95 |             f"Document number {i}", sample_metadata, f"doc_{i}")
 96 |     
 97 |     results = retriever.search("Document", k=3)
 98 |     
 99 |     assert len(results["ids"][0]) == 3
100 | 
101 | 
102 | class TestPersistentChromaRetriever:
103 |     """Test suite for PersistentChromaRetriever."""
104 |     
105 |     def test_creates_new_collection(self, temp_db_dir):
106 |         """Test creating a new persistent collection."""
107 |         retriever = PersistentChromaRetriever(
108 |             directory=str(temp_db_dir),
109 |             collection_name="new_collection"
110 |         )
111 |         
112 |         assert retriever.collection is not None
113 |         assert retriever.collection_name == "new_collection"
114 |         assert temp_db_dir.exists()
115 |     
116 |     @pytest.mark.parametrize("collection_name,extend,should_raise", [
117 |         ("existing_collection", False, True),   # Existing collection, no extend -> error
118 |         ("existing_collection", True, False),   # Existing collection, extend -> success
119 |         ("new_collection", False, False),       # New collection, no extend -> success
120 |         ("new_collection", True, False),        # New collection, extend -> success
121 |     ])
122 |     def test_collection_access_control(
123 |         self, existing_collection, collection_name, extend, should_raise
124 |     ):
125 |         """Test collection access with different combinations of name and extend flag."""
126 |         temp_db_dir, existing_name = existing_collection
127 |         
128 |         if should_raise:
129 |             with pytest.raises(ValueError, match="already exists"):
130 |                 PersistentChromaRetriever(
131 |                     directory=str(temp_db_dir),
132 |                     collection_name=collection_name,
133 |                     extend=extend
134 |                 )
135 |         else:
136 |             retriever = PersistentChromaRetriever(
137 |                 directory=str(temp_db_dir),
138 |                 collection_name=collection_name,
139 |                 extend=extend
140 |             )
141 |             assert retriever.collection is not None
142 |             
143 |             # If accessing existing collection, verify data is accessible
144 |             if collection_name == existing_name:
145 |                 results = retriever.collection.get(ids=["existing_doc"])
146 |                 assert len(results["ids"]) == 1
147 |     
148 |     def test_persistence_across_sessions(self, temp_db_dir, sample_metadata):
149 |         """Test that data persists across different retriever instances."""
150 |         collection_name = "persistent_collection"
151 |         
152 |         # Session 1: Create and add document
153 |         retriever1 = PersistentChromaRetriever(
154 |             directory=str(temp_db_dir),
155 |             collection_name=collection_name
156 |         )
157 |         retriever1.add_document("Persistent data", sample_metadata, "persist_doc")
158 |         del retriever1
159 |         
160 |         # Session 2: Reconnect and verify data exists
161 |         retriever2 = PersistentChromaRetriever(
162 |             directory=str(temp_db_dir),
163 |             collection_name=collection_name,
164 |             extend=True
165 |         )
166 |         
167 |         results = retriever2.collection.get(ids=["persist_doc"])
168 |         assert len(results["ids"]) == 1
169 |         assert results["documents"][0] == "Persistent data"
170 |     
171 |     def test_uses_default_directory_when_none(self):
172 |         """Test that default directory is used when none provided."""
173 |         retriever = PersistentChromaRetriever(
174 |             collection_name="default_dir_collection"
175 |         )
176 |         
177 |         # Should use ~/.chromadb as default
178 |         from pathlib import Path
179 |         default_path = Path.home() / '.chromadb'
180 |         assert default_path.exists()
181 |         
182 |         # Cleanup
183 |         retriever.client.delete_collection("default_dir_collection")
184 | 


--------------------------------------------------------------------------------
/tests/test_memory_system.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from agentic_memory.memory_system import AgenticMemorySystem, MemoryNote
  3 | from datetime import datetime
  4 | 
  5 | class TestAgenticMemorySystem(unittest.TestCase):
  6 |     def setUp(self):
  7 |         """Set up test environment before each test."""
  8 |         self.memory_system = AgenticMemorySystem(
  9 |             model_name='all-MiniLM-L6-v2',
 10 |             llm_backend="openai",
 11 |             llm_model="gpt-4o-mini"
 12 |         )
 13 |         
 14 |     def test_create_memory(self):
 15 |         """Test creating a new memory with complete metadata."""
 16 |         content = "Test memory content"
 17 |         tags = ["test", "memory"]
 18 |         keywords = ["test", "content"]
 19 |         links = ["link1", "link2"]
 20 |         context = "Test context"
 21 |         category = "Test category"
 22 |         timestamp = datetime.now().strftime("%Y%m%d%H%M")
 23 |         
 24 |         memory_id = self.memory_system.add_note(
 25 |             content=content,
 26 |             tags=tags,
 27 |             keywords=keywords,
 28 |             links=links,
 29 |             context=context,
 30 |             category=category,
 31 |             timestamp=timestamp
 32 |         )
 33 |         
 34 |         # Verify memory was created
 35 |         self.assertIsNotNone(memory_id)
 36 |         memory = self.memory_system.read(memory_id)
 37 |         self.assertIsNotNone(memory)
 38 |         self.assertEqual(memory.content, content)
 39 |         self.assertEqual(memory.tags, tags)
 40 |         self.assertEqual(memory.keywords, keywords)
 41 |         self.assertEqual(memory.links, links)
 42 |         self.assertEqual(memory.context, context)
 43 |         self.assertEqual(memory.category, category)
 44 |         self.assertEqual(memory.timestamp, timestamp)
 45 |         
 46 |     def test_memory_metadata_persistence(self):
 47 |         """Test that memory metadata persists through ChromaDB storage and retrieval."""
 48 |         # Create a memory with complex metadata
 49 |         content = "Complex test memory"
 50 |         tags = ["test", "complex", "metadata"]
 51 |         keywords = ["test", "complex", "keywords"]
 52 |         links = ["link1", "link2", "link3"]
 53 |         context = "Complex test context"
 54 |         category = "Complex test category"
 55 |         timestamp = datetime.now().strftime("%Y%m%d%H%M")
 56 |         evolution_history = ["evolution1", "evolution2"]
 57 |         
 58 |         memory_id = self.memory_system.add_note(
 59 |             content=content,
 60 |             tags=tags,
 61 |             keywords=keywords,
 62 |             links=links,
 63 |             context=context,
 64 |             category=category,
 65 |             timestamp=timestamp,
 66 |             evolution_history=evolution_history
 67 |         )
 68 |         
 69 |         # Search for the memory using ChromaDB
 70 |         results = self.memory_system.search_agentic(content, k=1)
 71 |         self.assertGreater(len(results), 0)
 72 |         
 73 |         # Verify metadata in search results
 74 |         result = results[0]
 75 |         self.assertEqual(result['content'], content)
 76 |         self.assertEqual(result['tags'], tags)
 77 |         self.assertEqual(result['keywords'], keywords)
 78 |         self.assertEqual(result['context'], context)
 79 |         self.assertEqual(result['category'], category)
 80 |         
 81 |     def test_memory_update(self):
 82 |         """Test updating memory metadata through ChromaDB."""
 83 |         # Create initial memory
 84 |         content = "Initial content"
 85 |         memory_id = self.memory_system.add_note(content=content)
 86 |         
 87 |         # Update memory with new metadata
 88 |         new_content = "Updated content"
 89 |         new_tags = ["updated", "tags"]
 90 |         new_keywords = ["updated", "keywords"]
 91 |         new_context = "Updated context"
 92 |         
 93 |         success = self.memory_system.update(
 94 |             memory_id,
 95 |             content=new_content,
 96 |             tags=new_tags,
 97 |             keywords=new_keywords,
 98 |             context=new_context
 99 |         )
100 |         
101 |         self.assertTrue(success)
102 |         
103 |         # Verify updates in ChromaDB
104 |         results = self.memory_system.search_agentic(new_content, k=1)
105 |         self.assertGreater(len(results), 0)
106 |         result = results[0]
107 |         self.assertEqual(result['content'], new_content)
108 |         self.assertEqual(result['tags'], new_tags)
109 |         self.assertEqual(result['keywords'], new_keywords)
110 |         self.assertEqual(result['context'], new_context)
111 |         
112 |     def test_memory_relationships(self):
113 |         """Test memory relationships and linked memories."""
114 |         # Create related memories
115 |         content1 = "First memory"
116 |         content2 = "Second memory"
117 |         content3 = "Third memory"
118 |         
119 |         id1 = self.memory_system.add_note(content1)
120 |         id2 = self.memory_system.add_note(content2)
121 |         id3 = self.memory_system.add_note(content3)
122 |         
123 |         # Add relationships
124 |         memory1 = self.memory_system.read(id1)
125 |         memory2 = self.memory_system.read(id2)
126 |         memory3 = self.memory_system.read(id3)
127 |         
128 |         memory1.links.append(id2)
129 |         memory2.links.append(id1)
130 |         memory2.links.append(id3)
131 |         memory3.links.append(id2)
132 |         
133 |         # Update memories with relationships
134 |         self.memory_system.update(id1, links=memory1.links)
135 |         self.memory_system.update(id2, links=memory2.links)
136 |         self.memory_system.update(id3, links=memory3.links)
137 |         
138 |         # Test relationship retrieval
139 |         results = self.memory_system.search_agentic(content1, k=3)
140 |         self.assertGreater(len(results), 0)
141 |         
142 |         # Verify relationships are maintained
143 |         memory1_updated = self.memory_system.read(id1)
144 |         self.assertIn(id2, memory1_updated.links)
145 |         
146 |     def test_memory_evolution(self):
147 |         """Test memory evolution system with ChromaDB."""
148 |         # Create related memories
149 |         contents = [
150 |             "Deep learning neural networks",
151 |             "Neural network architectures",
152 |             "Training deep neural networks"
153 |         ]
154 |         
155 |         memory_ids = []
156 |         for content in contents:
157 |             memory_id = self.memory_system.add_note(content)
158 |             memory_ids.append(memory_id)
159 |             
160 |         # Verify that memories have been properly evolved
161 |         for memory_id in memory_ids:
162 |             memory = self.memory_system.read(memory_id)
163 |             self.assertIsNotNone(memory.tags)
164 |             self.assertIsNotNone(memory.context)
165 |             self.assertIsNotNone(memory.keywords)
166 |             
167 |         # Test evolution through search
168 |         results = self.memory_system.search_agentic("neural networks", k=3)
169 |         self.assertGreater(len(results), 0)
170 |         
171 |         # Verify evolution metadata
172 |         for result in results:
173 |             self.assertIsNotNone(result['tags'])
174 |             self.assertIsNotNone(result['context'])
175 |             self.assertIsNotNone(result['keywords'])
176 |             
177 |     def test_memory_deletion(self):
178 |         """Test memory deletion from ChromaDB."""
179 |         # Create and delete a memory
180 |         content = "Memory to delete"
181 |         memory_id = self.memory_system.add_note(content)
182 |         
183 |         # Verify memory exists
184 |         memory = self.memory_system.read(memory_id)
185 |         self.assertIsNotNone(memory)
186 |         
187 |         # Delete memory
188 |         success = self.memory_system.delete(memory_id)
189 |         self.assertTrue(success)
190 |         
191 |         # Verify deletion
192 |         memory = self.memory_system.read(memory_id)
193 |         self.assertIsNone(memory)
194 |         
195 |         # Verify memory is removed from ChromaDB
196 |         results = self.memory_system.search_agentic(content, k=1)
197 |         self.assertEqual(len(results), 0)
198 |         
199 |     def test_memory_consolidation(self):
200 |         """Test memory consolidation with ChromaDB."""
201 |         # Create multiple memories
202 |         contents = [
203 |             "Memory 1",
204 |             "Memory 2",
205 |             "Memory 3"
206 |         ]
207 |         
208 |         for content in contents:
209 |             self.memory_system.add_note(content)
210 |             
211 |         # Force consolidation
212 |         self.memory_system.consolidate_memories()
213 |         
214 |         # Verify memories are still accessible
215 |         for content in contents:
216 |             results = self.memory_system.search_agentic(content, k=1)
217 |             self.assertGreater(len(results), 0)
218 |             self.assertEqual(results[0]['content'], content)
219 |             
220 |     def test_find_related_memories(self):
221 |         """Test finding related memories."""
222 |         # Create test memories
223 |         contents = [
224 |             "Python programming language",
225 |             "Python data science",
226 |             "Machine learning with Python",
227 |             "Web development with JavaScript"
228 |         ]
229 |         
230 |         for content in contents:
231 |             self.memory_system.add_note(content)
232 |             
233 |         # Test finding related memories
234 |         results = self.memory_system.find_related_memories("Python", k=2)
235 |         self.assertGreater(len(results), 0)
236 |         
237 |     def test_find_related_memories_raw(self):
238 |         """Test finding related memories with raw format."""
239 |         # Create test memories
240 |         contents = [
241 |             "Python programming language",
242 |             "Python data science",
243 |             "Machine learning with Python"
244 |         ]
245 |         
246 |         for content in contents:
247 |             self.memory_system.add_note(content)
248 |             
249 |         # Test finding related memories in raw format
250 |         results = self.memory_system.find_related_memories_raw("Python", k=2)
251 |         self.assertIsNotNone(results)
252 |         
253 |     def test_process_memory(self):
254 |         """Test memory processing and evolution."""
255 |         # Create a test memory
256 |         content = "Test memory for processing"
257 |         memory_id = self.memory_system.add_note(content)
258 |         
259 |         # Get the memory
260 |         memory = self.memory_system.read(memory_id)
261 |         
262 |         # Process the memory
263 |         should_evolve, processed_memory = self.memory_system.process_memory(memory)
264 |         
265 |         # Verify processing results
266 |         self.assertIsInstance(should_evolve, bool)
267 |         self.assertIsInstance(processed_memory, MemoryNote)
268 |         self.assertIsNotNone(processed_memory.tags)
269 |         self.assertIsNotNone(processed_memory.context)
270 |         self.assertIsNotNone(processed_memory.keywords)
271 | 
272 | if __name__ == '__main__':
273 |     unittest.main()
274 | 


--------------------------------------------------------------------------------
/agentic_memory/retrievers.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from pathlib import Path
  3 | from typing import Dict, List, Optional
  4 | import ast
  5 | import tempfile
  6 | import atexit
  7 | 
  8 | import chromadb
  9 | from chromadb.config import Settings
 10 | from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
 11 | from nltk.tokenize import word_tokenize
 12 | 
 13 | 
 14 | def simple_tokenize(text):
 15 |     return word_tokenize(text)
 16 | 
 17 | 
 18 | def _clone_collection(
 19 |     src: chromadb.Collection,
 20 |     dest: chromadb.Collection,
 21 |     batch_size: int = 10
 22 | ):
 23 |     """
 24 |     Copies one ChromaDB collection to another. 
 25 |     Enables duplicating of collections.
 26 |     This seemed to be the only (best) way to do this as the official ChromaDB
 27 |         docs also suggest this method:
 28 |     """
 29 |     existing_count = src.count()
 30 |     for i in range(0, existing_count, batch_size):
 31 |         batch = src.get(
 32 |             include=["metadatas", "documents", "embeddings"],
 33 |             limit=batch_size,
 34 |             offset=i)
 35 |         dest.add(
 36 |             ids=batch["ids"],
 37 |             documents=batch["documents"],
 38 |             metadatas=batch["metadatas"],
 39 |             embeddings=batch["embeddings"])
 40 | 
 41 | 
 42 | class ChromaRetriever:
 43 |     """Vector database retrieval using ChromaDB"""
 44 | 
 45 |     def __init__(
 46 |         self, 
 47 |         collection_name: str = "memories", 
 48 |         model_name: str = "all-MiniLM-L6-v2"
 49 |     ):
 50 |         """Initialize ChromaDB retriever.
 51 | 
 52 |         Args:
 53 |             collection_name: Name of the ChromaDB collection
 54 |         """
 55 |         self.client = chromadb.Client(Settings(allow_reset=True))
 56 |         self.embedding_function = SentenceTransformerEmbeddingFunction(
 57 |             model_name=model_name
 58 |         )
 59 |         self.collection = self.client.get_or_create_collection(
 60 |             name=collection_name, embedding_function=self.embedding_function
 61 |         )
 62 | 
 63 |     def add_document(self, document: str, metadata: Dict, doc_id: str):
 64 |         """Add a document to ChromaDB.
 65 | 
 66 |         Args:
 67 |             document: Text content to add
 68 |             metadata: Dictionary of metadata
 69 |             doc_id: Unique identifier for the document
 70 |         """
 71 |         # Convert MemoryNote object to serializable format
 72 |         processed_metadata = {}
 73 |         for key, value in metadata.items():
 74 |             if isinstance(value, list):
 75 |                 processed_metadata[key] = json.dumps(value)
 76 |             elif isinstance(value, dict):
 77 |                 processed_metadata[key] = json.dumps(value)
 78 |             else:
 79 |                 processed_metadata[key] = str(value)
 80 | 
 81 |         self.collection.add(
 82 |             documents=[document], metadatas=[processed_metadata], ids=[doc_id]
 83 |         )
 84 | 
 85 |     def delete_document(self, doc_id: str):
 86 |         """Delete a document from ChromaDB.
 87 | 
 88 |         Args:
 89 |             doc_id: ID of document to delete
 90 |         """
 91 |         self.collection.delete(ids=[doc_id])
 92 | 
 93 |     def search(self, query: str, k: int = 5):
 94 |         """Search for similar documents.
 95 | 
 96 |         Args:
 97 |             query: Query text
 98 |             k: Number of results to return
 99 | 
100 |         Returns:
101 |             Dict with documents, metadatas, ids, and distances
102 |         """
103 |         results = self.collection.query(query_texts=[query], n_results=k)
104 |         
105 |         if (results is not None) and (results.get("metadatas", [])):
106 |             results["metadatas"] = self._convert_metadata_types(
107 |                 results["metadatas"])
108 |         
109 |         return results
110 | 
111 |     def _convert_metadata_types(
112 |         self, 
113 |         metadatas: List[List[Dict]]
114 |     ) -> List[List[Dict]]:
115 |         """Convert string metadata back to original types.
116 |         
117 |         Args:
118 |             metadatas: List of metadata lists from query results
119 |             
120 |         Returns:
121 |             Converted metadata structure
122 |         """
123 |         for query_metadatas in metadatas:
124 |             if isinstance(query_metadatas, List):
125 |                 for metadata_dict in query_metadatas:
126 |                     if isinstance(metadata_dict, Dict):
127 |                         self._convert_metadata_dict(metadata_dict)
128 |         return metadatas
129 | 
130 |     def _convert_metadata_dict(self, metadata: Dict) -> None:
131 |         """Convert metadata values from strings to appropriate types in-place.
132 |         
133 |         Args:
134 |             metadata: Single metadata dictionary to convert
135 |         """
136 |         for key, value in metadata.items():
137 |             # only attempt to convert strings
138 |             if not isinstance(value, str):
139 |                 continue
140 |             else:
141 |                 try:
142 |                     metadata[key] = ast.literal_eval(value)
143 |                 except Exception:
144 |                     pass
145 | 
146 | 
147 | class PersistentChromaRetriever(ChromaRetriever):
148 |     """
149 |     Persistent ChromaDB client/retriever to facilitate sharing of memory from
150 |         multiple agents across sessions.
151 |     Simply changes how the client and collection are initialized. Other
152 |         functionality is inherited from ChromaRetriever.
153 |     """
154 | 
155 |     def __init__(
156 |         self, 
157 |         directory: Optional[str] = None, 
158 |         collection_name: str = "memories", 
159 |         model_name: str = "all-MiniLM-L6-v2",
160 |         extend: bool = False
161 |     ):
162 |         """
163 |         Initialize persistent ChromaDB retriever.
164 |         
165 |         :param directory: Directory path for ChromaDB storage. Defaults to
166 |             '~/.chromadb' if None.
167 |         :collection_name: Name of the ChromaDB collection.
168 |         :model_name: SentenceTransformer model name for embeddings.
169 |         :extend: If True, allows initializes client and retriever from
170 |             collection if it exists. Raises error if False and collection
171 |             already exists. This prevents accidental overwriting of
172 |             existing collections.
173 |         """
174 |         if directory is None:
175 |             directory = Path.home() / '.chromadb'
176 |             directory.mkdir(parents=True, exist_ok=True)
177 |         elif isinstance(directory, str):
178 |             directory = Path(directory)
179 | 
180 |         try:
181 |             directory.resolve(strict=True)
182 |         except FileNotFoundError:
183 |             directory.mkdir(parents=True, exist_ok=True)
184 |         except Exception as e:
185 |             raise ValueError(f'Error accessing directory: {e}')        
186 | 
187 |         # Use PersistentClient instead of regular Client
188 |         self.client = chromadb.PersistentClient(path=str(directory))
189 |         self.embedding_function = SentenceTransformerEmbeddingFunction(
190 |             model_name=model_name)
191 |         
192 |         existing_collections = [col.name for col in self.client.list_collections()]
193 |         
194 |         if collection_name in existing_collections:
195 |             if extend:
196 |                 self.collection = self.client.get_collection(name=collection_name)
197 |             else:
198 |                 raise ValueError(
199 |                     f"Collection '{collection_name}' already exists. "
200 |                     "Use extend=True to add to it."
201 |                 )
202 |         else:
203 |             self.collection = self.client.get_or_create_collection(
204 |                 name=collection_name,
205 |                 embedding_function=self.embedding_function
206 |             )
207 |         self.collection_name = collection_name
208 | 
209 | 
210 | class CopiedChromaRetriever(PersistentChromaRetriever):
211 |     """
212 |     ChromaDB retriever that creates a copy of an existing collection
213 |         under to a temporary ChromaDB instance.
214 |     Useful for creating isolated copies of shared starting memory collections.
215 |     """
216 | 
217 |     def __init__(
218 |         self,
219 |         directory: Optional[str] = None, 
220 |         collection_name: str = "memories", 
221 |         model_name: str = "all-MiniLM-L6-v2",
222 |         _dest_collection_name: Optional[str] = None,
223 |         _copy_batch_size: int = 10,
224 |     ):
225 |         """
226 |         Initialize the CopiedChromaDB retriever.
227 | 
228 |         :param directory: Directory path for source ChromaDB storage. If None,
229 |             defaults to '~/.chromadb'.
230 |         :param collection_name: Name of the source ChromaDB collection to copy.
231 |         :param model_name: SentenceTransformer model name for embeddings.
232 |         :param _dest_collection_name: Optional name for the destination
233 |             collection. If None, defaults to '{collection_name}__clone'.
234 |             This parameter is marked as private as the class itself is meant
235 |             for single use and discard db that exists in a temporary so naming
236 |             the copied collection is most likely not needed. 
237 |         :param _copy_batch_size: Number of documents to copy per batch.
238 |             Shouldn't need to be changed normally. 
239 |         """
240 | 
241 |         self.embedding_function = SentenceTransformerEmbeddingFunction(
242 |             model_name=model_name)
243 | 
244 |         # ensure source is valid
245 |         if directory is None:
246 |             directory = Path.home() / '.chromadb'
247 |             directory.mkdir(parents=True, exist_ok=True)
248 |         elif isinstance(directory, str):
249 |             directory = Path(directory)
250 |         self._src_client = chromadb.PersistentClient(path=str(directory))
251 | 
252 |         self._src = self._src_client.get_collection(name=collection_name)
253 |         existing_collections = [
254 |             col.name for col in self._src_client.list_collections()]
255 |         if collection_name not in existing_collections:
256 |             raise ValueError(
257 |                 f"Collection '{collection_name}' to be copied does not exist."
258 |             )        
259 | 
260 |         # use temp directory for destination collection
261 |         try:
262 |             self._tmpdir = tempfile.TemporaryDirectory(
263 |                 prefix='chromadb_ephemeral_')
264 |             self._tmp_path = Path(self._tmpdir.name)
265 |             self._dst_client = chromadb.PersistentClient(
266 |                 path=str(self._tmp_path)
267 |             )
268 |             self.collection_name = (
269 |                 _dest_collection_name 
270 |                 or f"{collection_name}__clone"
271 |             )
272 |             self.collection = self._dst_client.get_or_create_collection(
273 |                 name=self.collection_name,
274 |                 embedding_function=self.embedding_function,
275 |                 metadata=self._src.metadata
276 |             )
277 |         except Exception as e:
278 |             raise ValueError(f"Error creating temporary ChromaDB: {e}")
279 |         
280 |         try:
281 |             _clone_collection(
282 |                 src=self._src,
283 |                 dest=self.collection,
284 |                 batch_size=_copy_batch_size,
285 |             )
286 |         except Exception as e:
287 |             raise ValueError(f"Error cloning ChromaDB collection: {e}")
288 |         
289 |         atexit.register(self.close)
290 | 
291 |     def close(self):
292 |         """Cleanup temporary directory."""
293 |         try:
294 |             self._dst_client.delete_collection(self.collection_name)
295 |         except Exception:
296 |             pass
297 |         try:
298 |             self._tmpdir.cleanup()
299 |         except Exception:
300 |             pass
301 | 
302 |     def __exit__(self, exc_type, exc_value, traceback):
303 |         self.close()
304 | 


--------------------------------------------------------------------------------
/agentic_memory/memory_system.py:
--------------------------------------------------------------------------------
  1 | import keyword
  2 | from typing import List, Dict, Optional, Any, Tuple
  3 | import uuid
  4 | from datetime import datetime
  5 | from .llm_controller import LLMController
  6 | from .retrievers import ChromaRetriever
  7 | import json
  8 | import logging
  9 | from rank_bm25 import BM25Okapi
 10 | from sentence_transformers import SentenceTransformer
 11 | import numpy as np
 12 | from sklearn.metrics.pairwise import cosine_similarity
 13 | import os
 14 | from abc import ABC, abstractmethod
 15 | from transformers import AutoModel, AutoTokenizer
 16 | from nltk.tokenize import word_tokenize
 17 | import pickle
 18 | from pathlib import Path
 19 | from litellm import completion
 20 | import time
 21 | 
 22 | logger = logging.getLogger(__name__)
 23 | 
 24 | class MemoryNote:
 25 |     """A memory note that represents a single unit of information in the memory system.
 26 |     
 27 |     This class encapsulates all metadata associated with a memory, including:
 28 |     - Core content and identifiers
 29 |     - Temporal information (creation and access times)
 30 |     - Semantic metadata (keywords, context, tags)
 31 |     - Relationship data (links to other memories)
 32 |     - Usage statistics (retrieval count)
 33 |     - Evolution tracking (history of changes)
 34 |     """
 35 |     
 36 |     def __init__(self, 
 37 |                  content: str,
 38 |                  id: Optional[str] = None,
 39 |                  keywords: Optional[List[str]] = None,
 40 |                  links: Optional[Dict] = None,
 41 |                  retrieval_count: Optional[int] = None,
 42 |                  timestamp: Optional[str] = None,
 43 |                  last_accessed: Optional[str] = None,
 44 |                  context: Optional[str] = None,
 45 |                  evolution_history: Optional[List] = None,
 46 |                  category: Optional[str] = None,
 47 |                  tags: Optional[List[str]] = None):
 48 |         """Initialize a new memory note with its associated metadata.
 49 |         
 50 |         Args:
 51 |             content (str): The main text content of the memory
 52 |             id (Optional[str]): Unique identifier for the memory. If None, a UUID will be generated
 53 |             keywords (Optional[List[str]]): Key terms extracted from the content
 54 |             links (Optional[Dict]): References to related memories
 55 |             retrieval_count (Optional[int]): Number of times this memory has been accessed
 56 |             timestamp (Optional[str]): Creation time in format YYYYMMDDHHMM
 57 |             last_accessed (Optional[str]): Last access time in format YYYYMMDDHHMM
 58 |             context (Optional[str]): The broader context or domain of the memory
 59 |             evolution_history (Optional[List]): Record of how the memory has evolved
 60 |             category (Optional[str]): Classification category
 61 |             tags (Optional[List[str]]): Additional classification tags
 62 |         """
 63 |         # Core content and ID
 64 |         self.content = content
 65 |         self.id = id or str(uuid.uuid4())
 66 |         
 67 |         # Semantic metadata
 68 |         self.keywords = keywords or []
 69 |         self.links = links or []
 70 |         self.context = context or "General"
 71 |         self.category = category or "Uncategorized"
 72 |         self.tags = tags or []
 73 |         
 74 |         # Temporal information
 75 |         current_time = datetime.now().strftime("%Y%m%d%H%M")
 76 |         self.timestamp = timestamp or current_time
 77 |         self.last_accessed = last_accessed or current_time
 78 |         
 79 |         # Usage and evolution data
 80 |         self.retrieval_count = retrieval_count or 0
 81 |         self.evolution_history = evolution_history or []
 82 | 
 83 | class AgenticMemorySystem:
 84 |     """Core memory system that manages memory notes and their evolution.
 85 |     
 86 |     This system provides:
 87 |     - Memory creation, retrieval, update, and deletion
 88 |     - Content analysis and metadata extraction
 89 |     - Memory evolution and relationship management
 90 |     - Hybrid search capabilities
 91 |     """
 92 |     
 93 |     def __init__(self, 
 94 |                  model_name: str = 'all-MiniLM-L6-v2',
 95 |                  llm_backend: str = "openai",
 96 |                  llm_model: str = "gpt-4o-mini",
 97 |                  evo_threshold: int = 100,
 98 |                  api_key: Optional[str] = None):  
 99 |         """Initialize the memory system.
100 |         
101 |         Args:
102 |             model_name: Name of the sentence transformer model
103 |             llm_backend: LLM backend to use (openai/ollama)
104 |             llm_model: Name of the LLM model
105 |             evo_threshold: Number of memories before triggering evolution
106 |             api_key: API key for the LLM service
107 |         """
108 |         self.memories = {}
109 |         self.model_name = model_name
110 |         # Initialize ChromaDB retriever with empty collection
111 |         try:
112 |             # First try to reset the collection if it exists
113 |             temp_retriever = ChromaRetriever(collection_name="memories",model_name=self.model_name)
114 |             temp_retriever.client.reset()
115 |         except Exception as e:
116 |             logger.warning(f"Could not reset ChromaDB collection: {e}")
117 |             
118 |         # Create a fresh retriever instance
119 |         self.retriever = ChromaRetriever(collection_name="memories",model_name=self.model_name)
120 |         
121 |         # Initialize LLM controller
122 |         self.llm_controller = LLMController(llm_backend, llm_model, api_key)
123 |         self.evo_cnt = 0
124 |         self.evo_threshold = evo_threshold
125 | 
126 |         # Evolution system prompt
127 |         self._evolution_system_prompt = '''
128 |                                 You are an AI memory evolution agent responsible for managing and evolving a knowledge base.
129 |                                 Analyze the the new memory note according to keywords and context, also with their several nearest neighbors memory.
130 |                                 Make decisions about its evolution.  
131 | 
132 |                                 The new memory context:
133 |                                 {context}
134 |                                 content: {content}
135 |                                 keywords: {keywords}
136 | 
137 |                                 The nearest neighbors memories:
138 |                                 {nearest_neighbors_memories}
139 | 
140 |                                 Based on this information, determine:
141 |                                 1. Should this memory be evolved? Consider its relationships with other memories.
142 |                                 2. What specific actions should be taken (strengthen, update_neighbor)?
143 |                                    2.1 If choose to strengthen the connection, which memory should it be connected to? Can you give the updated tags of this memory?
144 |                                    2.2 If choose to update_neighbor, you can update the context and tags of these memories based on the understanding of these memories. If the context and the tags are not updated, the new context and tags should be the same as the original ones. Generate the new context and tags in the sequential order of the input neighbors.
145 |                                 Tags should be determined by the content of these characteristic of these memories, which can be used to retrieve them later and categorize them.
146 |                                 Note that the length of new_tags_neighborhood must equal the number of input neighbors, and the length of new_context_neighborhood must equal the number of input neighbors.
147 |                                 The number of neighbors is {neighbor_number}.
148 |                                 Return your decision in JSON format with the following structure:
149 |                                 {{
150 |                                     "should_evolve": True or False,
151 |                                     "actions": ["strengthen", "update_neighbor"],
152 |                                     "suggested_connections": ["neighbor_memory_ids"],
153 |                                     "tags_to_update": ["tag_1",..."tag_n"], 
154 |                                     "new_context_neighborhood": ["new context",...,"new context"],
155 |                                     "new_tags_neighborhood": [["tag_1",...,"tag_n"],...["tag_1",...,"tag_n"]],
156 |                                 }}
157 |                                 '''
158 |         
159 |     def analyze_content(self, content: str) -> Dict:            
160 |         """Analyze content using LLM to extract semantic metadata.
161 |         
162 |         Uses a language model to understand the content and extract:
163 |         - Keywords: Important terms and concepts
164 |         - Context: Overall domain or theme
165 |         - Tags: Classification categories
166 |         
167 |         Args:
168 |             content (str): The text content to analyze
169 |             
170 |         Returns:
171 |             Dict: Contains extracted metadata with keys:
172 |                 - keywords: List[str]
173 |                 - context: str
174 |                 - tags: List[str]
175 |         """
176 |         prompt = """Generate a structured analysis of the following content by:
177 |             1. Identifying the most salient keywords (focus on nouns, verbs, and key concepts)
178 |             2. Extracting core themes and contextual elements
179 |             3. Creating relevant categorical tags
180 | 
181 |             Format the response as a JSON object:
182 |             {
183 |                 "keywords": [
184 |                     // several specific, distinct keywords that capture key concepts and terminology
185 |                     // Order from most to least important
186 |                     // Don't include keywords that are the name of the speaker or time
187 |                     // At least three keywords, but don't be too redundant.
188 |                 ],
189 |                 "context": 
190 |                     // one sentence summarizing:
191 |                     // - Main topic/domain
192 |                     // - Key arguments/points
193 |                     // - Intended audience/purpose
194 |                 ,
195 |                 "tags": [
196 |                     // several broad categories/themes for classification
197 |                     // Include domain, format, and type tags
198 |                     // At least three tags, but don't be too redundant.
199 |                 ]
200 |             }
201 | 
202 |             Content for analysis:
203 |             """ + content
204 |         try:
205 |             response = self.llm_controller.llm.get_completion(prompt, response_format={"type": "json_schema", "json_schema": {
206 |                         "name": "response",
207 |                         "schema": {
208 |                             "type": "object",
209 |                             "properties": {
210 |                                 "keywords": {
211 |                                     "type": "array",
212 |                                     "items": {
213 |                                         "type": "string"
214 |                                     }
215 |                                 },
216 |                                 "context": {
217 |                                     "type": "string",
218 |                                 },
219 |                                 "tags": {
220 |                                     "type": "array",
221 |                                     "items": {
222 |                                         "type": "string"
223 |                                     }
224 |                                 }
225 |                             }
226 |                         }
227 |                     }})
228 |             return json.loads(response)
229 |         except Exception as e:
230 |             print(f"Error analyzing content: {e}")
231 |             return {"keywords": [], "context": "General", "tags": []}
232 | 
233 |     def add_note(self, content: str, time: str = None, **kwargs) -> str:
234 |         """Add a new memory note"""
235 |         # Create MemoryNote without llm_controller
236 |         if time is not None:
237 |             kwargs['timestamp'] = time
238 |         note = MemoryNote(content=content, **kwargs)
239 |         
240 |         # Update retriever with all documents
241 |         evo_label, note = self.process_memory(note)
242 |         self.memories[note.id] = note
243 |         
244 |         # Add to ChromaDB with complete metadata
245 |         metadata = {
246 |             "id": note.id,
247 |             "content": note.content,
248 |             "keywords": note.keywords,
249 |             "links": note.links,
250 |             "retrieval_count": note.retrieval_count,
251 |             "timestamp": note.timestamp,
252 |             "last_accessed": note.last_accessed,
253 |             "context": note.context,
254 |             "evolution_history": note.evolution_history,
255 |             "category": note.category,
256 |             "tags": note.tags
257 |         }
258 |         self.retriever.add_document(note.content, metadata, note.id)
259 |         
260 |         if evo_label == True:
261 |             self.evo_cnt += 1
262 |             if self.evo_cnt % self.evo_threshold == 0:
263 |                 self.consolidate_memories()
264 |         return note.id
265 |     
266 |     def consolidate_memories(self):
267 |         """Consolidate memories: update retriever with new documents"""
268 |         # Reset ChromaDB collection
269 |         self.retriever = ChromaRetriever(collection_name="memories",model_name=self.model_name)
270 |         
271 |         # Re-add all memory documents with their complete metadata
272 |         for memory in self.memories.values():
273 |             metadata = {
274 |                 "id": memory.id,
275 |                 "content": memory.content,
276 |                 "keywords": memory.keywords,
277 |                 "links": memory.links,
278 |                 "retrieval_count": memory.retrieval_count,
279 |                 "timestamp": memory.timestamp,
280 |                 "last_accessed": memory.last_accessed,
281 |                 "context": memory.context,
282 |                 "evolution_history": memory.evolution_history,
283 |                 "category": memory.category,
284 |                 "tags": memory.tags
285 |             }
286 |             self.retriever.add_document(memory.content, metadata, memory.id)
287 |     
288 |     def find_related_memories(self, query: str, k: int = 5) -> Tuple[str, List[int]]:
289 |         """Find related memories using ChromaDB retrieval"""
290 |         if not self.memories:
291 |             return "", []
292 |             
293 |         try:
294 |             # Get results from ChromaDB
295 |             results = self.retriever.search(query, k)
296 |             
297 |             # Convert to list of memories
298 |             memory_str = ""
299 |             indices = []
300 |             
301 |             if 'ids' in results and results['ids'] and len(results['ids']) > 0 and len(results['ids'][0]) > 0:
302 |                 for i, doc_id in enumerate(results['ids'][0]):
303 |                     # Get metadata from ChromaDB results
304 |                     if i < len(results['metadatas'][0]):
305 |                         metadata = results['metadatas'][0][i]
306 |                         # Format memory string
307 |                         memory_str += f"memory index:{i}\ttalk start time:{metadata.get('timestamp', '')}\tmemory content: {metadata.get('content', '')}\tmemory context: {metadata.get('context', '')}\tmemory keywords: {str(metadata.get('keywords', []))}\tmemory tags: {str(metadata.get('tags', []))}\n"
308 |                         indices.append(i)
309 |                     
310 |             return memory_str, indices
311 |         except Exception as e:
312 |             logger.error(f"Error in find_related_memories: {str(e)}")
313 |             return "", []
314 | 
315 |     def find_related_memories_raw(self, query: str, k: int = 5) -> str:
316 |         """Find related memories using ChromaDB retrieval in raw format"""
317 |         if not self.memories:
318 |             return ""
319 |             
320 |         # Get results from ChromaDB
321 |         results = self.retriever.search(query, k)
322 |         
323 |         # Convert to list of memories
324 |         memory_str = ""
325 |         
326 |         if 'ids' in results and results['ids'] and len(results['ids']) > 0:
327 |             for i, doc_id in enumerate(results['ids'][0][:k]):
328 |                 if i < len(results['metadatas'][0]):
329 |                     # Get metadata from ChromaDB results
330 |                     metadata = results['metadatas'][0][i]
331 |                     
332 |                     # Add main memory info
333 |                     memory_str += f"talk start time:{metadata.get('timestamp', '')}\tmemory content: {metadata.get('content', '')}\tmemory context: {metadata.get('context', '')}\tmemory keywords: {str(metadata.get('keywords', []))}\tmemory tags: {str(metadata.get('tags', []))}\n"
334 |                     
335 |                     # Add linked memories if available
336 |                     links = metadata.get('links', [])
337 |                     j = 0
338 |                     for link_id in links:
339 |                         if link_id in self.memories and j < k:
340 |                             neighbor = self.memories[link_id]
341 |                             memory_str += f"talk start time:{neighbor.timestamp}\tmemory content: {neighbor.content}\tmemory context: {neighbor.context}\tmemory keywords: {str(neighbor.keywords)}\tmemory tags: {str(neighbor.tags)}\n"
342 |                             j += 1
343 |                             
344 |         return memory_str
345 | 
346 |     def read(self, memory_id: str) -> Optional[MemoryNote]:
347 |         """Retrieve a memory note by its ID.
348 |         
349 |         Args:
350 |             memory_id (str): ID of the memory to retrieve
351 |             
352 |         Returns:
353 |             MemoryNote if found, None otherwise
354 |         """
355 |         return self.memories.get(memory_id)
356 |     
357 |     def update(self, memory_id: str, **kwargs) -> bool:
358 |         """Update a memory note.
359 |         
360 |         Args:
361 |             memory_id: ID of memory to update
362 |             **kwargs: Fields to update
363 |             
364 |         Returns:
365 |             bool: True if update successful
366 |         """
367 |         if memory_id not in self.memories:
368 |             return False
369 |             
370 |         note = self.memories[memory_id]
371 |         
372 |         # Update fields
373 |         for key, value in kwargs.items():
374 |             if hasattr(note, key):
375 |                 setattr(note, key, value)
376 |                 
377 |         # Update in ChromaDB
378 |         metadata = {
379 |             "id": note.id,
380 |             "content": note.content,
381 |             "keywords": note.keywords,
382 |             "links": note.links,
383 |             "retrieval_count": note.retrieval_count,
384 |             "timestamp": note.timestamp,
385 |             "last_accessed": note.last_accessed,
386 |             "context": note.context,
387 |             "evolution_history": note.evolution_history,
388 |             "category": note.category,
389 |             "tags": note.tags
390 |         }
391 |         
392 |         # Delete and re-add to update
393 |         self.retriever.delete_document(memory_id)
394 |         self.retriever.add_document(document=note.content, metadata=metadata, doc_id=memory_id)
395 |         
396 |         return True
397 |     
398 |     def delete(self, memory_id: str) -> bool:
399 |         """Delete a memory note by its ID.
400 |         
401 |         Args:
402 |             memory_id (str): ID of the memory to delete
403 |             
404 |         Returns:
405 |             bool: True if memory was deleted, False if not found
406 |         """
407 |         if memory_id in self.memories:
408 |             # Delete from ChromaDB
409 |             self.retriever.delete_document(memory_id)
410 |             # Delete from local storage
411 |             del self.memories[memory_id]
412 |             return True
413 |         return False
414 |     
415 |     def _search_raw(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
416 |         """Internal search method that returns raw results from ChromaDB.
417 |         
418 |         This is used internally by the memory evolution system to find
419 |         related memories for potential evolution.
420 |         
421 |         Args:
422 |             query (str): The search query text
423 |             k (int): Maximum number of results to return
424 |             
425 |         Returns:
426 |             List[Dict[str, Any]]: Raw search results from ChromaDB
427 |         """
428 |         results = self.retriever.search(query, k)
429 |         return [{'id': doc_id, 'score': score} 
430 |                 for doc_id, score in zip(results['ids'][0], results['distances'][0])]
431 |                 
432 |     def search(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
433 |         """Search for memories using a hybrid retrieval approach."""
434 |         # Get results from ChromaDB (only do this once)
435 |         search_results = self.retriever.search(query, k)
436 |         memories = []
437 |         
438 |         # Process ChromaDB results
439 |         for i, doc_id in enumerate(search_results['ids'][0]):
440 |             memory = self.memories.get(doc_id)
441 |             if memory:
442 |                 memories.append({
443 |                     'id': doc_id,
444 |                     'content': memory.content,
445 |                     'context': memory.context,
446 |                     'keywords': memory.keywords,
447 |                     'score': search_results['distances'][0][i]
448 |                 })
449 |         
450 |         return memories[:k]
451 |     
452 |     def _search(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
453 |         """Search for memories using a hybrid retrieval approach.
454 |         
455 |         This method combines results from both:
456 |         1. ChromaDB vector store (semantic similarity)
457 |         2. Embedding-based retrieval (dense vectors)
458 |         
459 |         The results are deduplicated and ranked by relevance.
460 |         
461 |         Args:
462 |             query (str): The search query text
463 |             k (int): Maximum number of results to return
464 |             
465 |         Returns:
466 |             List[Dict[str, Any]]: List of search results, each containing:
467 |                 - id: Memory ID
468 |                 - content: Memory content
469 |                 - score: Similarity score
470 |                 - metadata: Additional memory metadata
471 |         """
472 |         # Get results from ChromaDB
473 |         chroma_results = self.retriever.search(query, k)
474 |         memories = []
475 |         
476 |         # Process ChromaDB results
477 |         for i, doc_id in enumerate(chroma_results['ids'][0]):
478 |             memory = self.memories.get(doc_id)
479 |             if memory:
480 |                 memories.append({
481 |                     'id': doc_id,
482 |                     'content': memory.content,
483 |                     'context': memory.context,
484 |                     'keywords': memory.keywords,
485 |                     'score': chroma_results['distances'][0][i]
486 |                 })
487 |                 
488 |         # Get results from embedding retriever
489 |         embedding_results = self.retriever.search(query, k)
490 |         
491 |         # Combine results with deduplication
492 |         seen_ids = set(m['id'] for m in memories)
493 |         for result in embedding_results:
494 |             memory_id = result.get('id')
495 |             if memory_id and memory_id not in seen_ids:
496 |                 memory = self.memories.get(memory_id)
497 |                 if memory:
498 |                     memories.append({
499 |                         'id': memory_id,
500 |                         'content': memory.content,
501 |                         'context': memory.context,
502 |                         'keywords': memory.keywords,
503 |                         'score': result.get('score', 0.0)
504 |                     })
505 |                     seen_ids.add(memory_id)
506 |                     
507 |         return memories[:k]
508 | 
509 |     def search_agentic(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
510 |         """Search for memories using ChromaDB retrieval."""
511 |         if not self.memories:
512 |             return []
513 |             
514 |         try:
515 |             # Get results from ChromaDB
516 |             results = self.retriever.search(query, k)
517 |             
518 |             # Process results
519 |             memories = []
520 |             seen_ids = set()
521 |             
522 |             # Check if we have valid results
523 |             if ('ids' not in results or not results['ids'] or 
524 |                 len(results['ids']) == 0 or len(results['ids'][0]) == 0):
525 |                 return []
526 |                 
527 |             # Process ChromaDB results
528 |             for i, doc_id in enumerate(results['ids'][0][:k]):
529 |                 if doc_id in seen_ids:
530 |                     continue
531 |                     
532 |                 if i < len(results['metadatas'][0]):
533 |                     metadata = results['metadatas'][0][i]
534 |                     
535 |                     # Create result dictionary with all metadata fields
536 |                     memory_dict = {
537 |                         'id': doc_id,
538 |                         'content': metadata.get('content', ''),
539 |                         'context': metadata.get('context', ''),
540 |                         'keywords': metadata.get('keywords', []),
541 |                         'tags': metadata.get('tags', []),
542 |                         'timestamp': metadata.get('timestamp', ''),
543 |                         'category': metadata.get('category', 'Uncategorized'),
544 |                         'is_neighbor': False
545 |                     }
546 |                     
547 |                     # Add score if available
548 |                     if 'distances' in results and len(results['distances']) > 0 and i < len(results['distances'][0]):
549 |                         memory_dict['score'] = results['distances'][0][i]
550 |                         
551 |                     memories.append(memory_dict)
552 |                     seen_ids.add(doc_id)
553 |             
554 |             # Add linked memories (neighbors)
555 |             neighbor_count = 0
556 |             for memory in list(memories):  # Use a copy to avoid modification during iteration
557 |                 if neighbor_count >= k:
558 |                     break
559 |                     
560 |                 # Get links from metadata
561 |                 links = memory.get('links', [])
562 |                 if not links and 'id' in memory:
563 |                     # Try to get links from memory object
564 |                     mem_obj = self.memories.get(memory['id'])
565 |                     if mem_obj:
566 |                         links = mem_obj.links
567 |                         
568 |                 for link_id in links:
569 |                     if link_id not in seen_ids and neighbor_count < k:
570 |                         neighbor = self.memories.get(link_id)
571 |                         if neighbor:
572 |                             memories.append({
573 |                                 'id': link_id,
574 |                                 'content': neighbor.content,
575 |                                 'context': neighbor.context,
576 |                                 'keywords': neighbor.keywords,
577 |                                 'tags': neighbor.tags,
578 |                                 'timestamp': neighbor.timestamp,
579 |                                 'category': neighbor.category,
580 |                                 'is_neighbor': True
581 |                             })
582 |                             seen_ids.add(link_id)
583 |                             neighbor_count += 1
584 |             
585 |             return memories[:k]
586 |         except Exception as e:
587 |             logger.error(f"Error in search_agentic: {str(e)}")
588 |             return []
589 | 
590 |     def process_memory(self, note: MemoryNote) -> Tuple[bool, MemoryNote]:
591 |         """Process a memory note and determine if it should evolve.
592 |         
593 |         Args:
594 |             note: The memory note to process
595 |             
596 |         Returns:
597 |             Tuple[bool, MemoryNote]: (should_evolve, processed_note)
598 |         """
599 |         # For first memory or testing, just return the note without evolution
600 |         if not self.memories:
601 |             return False, note
602 |             
603 |         try:
604 |             # Get nearest neighbors
605 |             neighbors_text, indices = self.find_related_memories(note.content, k=5)
606 |             if not neighbors_text or not indices:
607 |                 return False, note
608 |                 
609 |             # Format neighbors for LLM - in this case, neighbors_text is already formatted
610 |             
611 |             # Query LLM for evolution decision
612 |             prompt = self._evolution_system_prompt.format(
613 |                 content=note.content,
614 |                 context=note.context,
615 |                 keywords=note.keywords,
616 |                 nearest_neighbors_memories=neighbors_text,
617 |                 neighbor_number=len(indices)
618 |             )
619 |             
620 |             try:
621 |                 response = self.llm_controller.llm.get_completion(
622 |                     prompt,
623 |                     response_format={"type": "json_schema", "json_schema": {
624 |                         "name": "response",
625 |                         "schema": {
626 |                             "type": "object",
627 |                             "properties": {
628 |                                 "should_evolve": {
629 |                                     "type": "boolean"
630 |                                 },
631 |                                 "actions": {
632 |                                     "type": "array",
633 |                                     "items": {
634 |                                         "type": "string"
635 |                                     }
636 |                                 },
637 |                                 "suggested_connections": {
638 |                                     "type": "array",
639 |                                     "items": {
640 |                                         "type": "string"
641 |                                     }
642 |                                 },
643 |                                 "new_context_neighborhood": {
644 |                                     "type": "array",
645 |                                     "items": {
646 |                                         "type": "string"
647 |                                     }
648 |                                 },
649 |                                 "tags_to_update": {
650 |                                     "type": "array",
651 |                                     "items": {
652 |                                         "type": "string"
653 |                                     }
654 |                                 },
655 |                                 "new_tags_neighborhood": {
656 |                                     "type": "array",
657 |                                     "items": {
658 |                                         "type": "array",
659 |                                         "items": {
660 |                                             "type": "string"
661 |                                         }
662 |                                     }
663 |                                 }
664 |                             },
665 |                             "required": ["should_evolve", "actions", "suggested_connections", 
666 |                                       "tags_to_update", "new_context_neighborhood", "new_tags_neighborhood"],
667 |                             "additionalProperties": False
668 |                         },
669 |                         "strict": True
670 |                     }}
671 |                 )
672 |                 
673 |                 response_json = json.loads(response)
674 |                 should_evolve = response_json["should_evolve"]
675 |                 
676 |                 if should_evolve:
677 |                     actions = response_json["actions"]
678 |                     for action in actions:
679 |                         if action == "strengthen":
680 |                             suggest_connections = response_json["suggested_connections"]
681 |                             new_tags = response_json["tags_to_update"]
682 |                             note.links.extend(suggest_connections)
683 |                             note.tags = new_tags
684 |                         elif action == "update_neighbor":
685 |                             new_context_neighborhood = response_json["new_context_neighborhood"]
686 |                             new_tags_neighborhood = response_json["new_tags_neighborhood"]
687 |                             noteslist = list(self.memories.values())
688 |                             notes_id = list(self.memories.keys())
689 |                             
690 |                             for i in range(min(len(indices), len(new_tags_neighborhood))):
691 |                                 # Skip if we don't have enough neighbors
692 |                                 if i >= len(indices):
693 |                                     continue
694 |                                     
695 |                                 tag = new_tags_neighborhood[i]
696 |                                 if i < len(new_context_neighborhood):
697 |                                     context = new_context_neighborhood[i]
698 |                                 else:
699 |                                     # Since indices are just numbers now, we need to find the memory
700 |                                     # In memory list using its index number
701 |                                     if i < len(noteslist):
702 |                                         context = noteslist[i].context
703 |                                     else:
704 |                                         continue
705 |                                         
706 |                                 # Get index from the indices list
707 |                                 if i < len(indices):
708 |                                     memorytmp_idx = indices[i]
709 |                                     # Make sure the index is valid
710 |                                     if memorytmp_idx < len(noteslist):
711 |                                         notetmp = noteslist[memorytmp_idx]
712 |                                         notetmp.tags = tag
713 |                                         notetmp.context = context
714 |                                         # Make sure the index is valid
715 |                                         if memorytmp_idx < len(notes_id):
716 |                                             self.memories[notes_id[memorytmp_idx]] = notetmp
717 |                                 
718 |                 return should_evolve, note
719 |                 
720 |             except (json.JSONDecodeError, KeyError, Exception) as e:
721 |                 logger.error(f"Error in memory evolution: {str(e)}")
722 |                 return False, note
723 |                 
724 |         except Exception as e:
725 |             # For testing purposes, catch all exceptions and return the original note
726 |             logger.error(f"Error in process_memory: {str(e)}")
727 |             return False, note
728 | 


--------------------------------------------------------------------------------