├── =2.5.0 ├── =4.36.0 ├── .venv ├── bin │ ├── python │ ├── python3 │ ├── pip │ ├── pip3 │ ├── pip3.10 │ ├── pip3.7 │ ├── easy_install │ ├── easy_install-3.7 │ ├── activate.csh │ ├── activate │ └── activate.fish └── pyvenv.cfg ├── install_oracle.sh ├── src └── memorizz │ ├── short_term_memory │ ├── working_memory │ │ ├── __init__.py │ │ └── cwm.py │ └── __init__.py │ ├── long_term_memory │ ├── procedural │ │ ├── toolbox │ │ │ ├── __init__.py │ │ │ └── tool_schema.py │ │ ├── workflow │ │ │ └── __init__.py │ │ ├── __init__.py │ │ └── persona │ │ │ └── README.md │ ├── semantic │ │ ├── persona │ │ │ ├── __init__.py │ │ │ ├── role_type.py │ │ │ └── README.md │ │ ├── entity_memory │ │ │ ├── __init__.py │ │ │ └── README.md │ │ └── __init__.py │ ├── episodic │ │ ├── __init__.py │ │ ├── conversational_memory_unit.py │ │ └── summary_component.py │ └── __init__.py │ ├── coordination │ ├── __init__.py │ └── shared_memory │ │ ├── __init__.py │ │ └── messages.py │ ├── memory_provider │ ├── mongodb │ │ └── __init__.py │ ├── filesystem │ │ └── __init__.py │ ├── oracle │ │ ├── __init__.py │ │ └── requirements.txt │ ├── __init__.py │ └── base.py │ ├── llms │ ├── __init__.py │ ├── llm_factory.py │ └── llm_provider.py │ ├── internet_access │ ├── providers │ │ ├── __init__.py │ │ └── offline.py │ ├── models.py │ ├── base.py │ └── __init__.py │ ├── database │ └── __init__.py │ ├── enums │ ├── role.py │ ├── semantic_cache_scope.py │ ├── __init__.py │ ├── memory_type.py │ └── application_mode.py │ ├── memagent │ ├── orchestrators │ │ ├── multi_agent_orchestrator.py │ │ └── __init__.py │ ├── handlers │ │ └── __init__.py │ ├── utils │ │ └── __init__.py │ ├── builders │ │ ├── __init__.py │ │ └── config_builder.py │ ├── constants.py │ ├── managers │ │ ├── __init__.py │ │ ├── internet_access_manager.py │ │ └── workflow_manager.py │ ├── __init__.py │ └── models.py │ ├── embeddings │ ├── openai │ │ ├── __init__.py │ │ └── provider.py │ ├── azure │ │ └── __init__.py │ ├── huggingface │ │ └── __init__.py │ ├── ollama │ │ └── __init__.py │ └── voyageai │ │ └── __init__.py │ ├── memory_unit │ ├── __init__.py │ ├── conversational_memory_unit.py │ ├── semantic_cache_entry.py │ └── summary_component.py │ ├── memagent.py │ ├── __init__.py │ ├── tests │ └── test_vegetarian_recipe_agent.py │ └── cli.py ├── tests ├── unit │ ├── __init__.py │ ├── test_firecrawl_provider.py │ ├── test_tavily_provider.py │ ├── test_internet_access.py │ ├── test_filesystem_provider.py │ └── test_entity_memory.py ├── __init__.py ├── integration │ └── __init__.py ├── mocks │ └── __init__.py └── performance │ └── __init__.py ├── examples ├── images │ └── memorizz_script_output.png └── setup_oracle_user.py ├── .gitignore ├── .env.example ├── docs ├── use-cases │ ├── workflow-mode.md │ └── assistant-mode.md ├── memory-types │ ├── episodic.md │ ├── short-term.md │ ├── shared.md │ ├── semantic.md │ └── procedural.md ├── memory-providers │ ├── custom.md │ ├── mongodb.md │ ├── oracle.md │ └── filesystem.md ├── index.md ├── getting-started │ ├── overview.md │ ├── python-sdk-quickstart.md │ └── concepts.md ├── utilities │ └── context_window_stats.md └── internet-access │ └── providers.md ├── .github └── workflows │ └── docs.yml ├── .pre-commit-config.yaml ├── pyproject.toml ├── Makefile ├── pytest.ini ├── setup_dev.sh ├── eval ├── README.md └── longmemeval │ ├── README.md │ ├── README_evaluation_architectures.md │ └── download_dataset.py ├── install_oracle_client.sh ├── mkdocs.yml ├── .claude.md └── =0.26.0 /=2.5.0: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /=4.36.0: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.venv/bin/python: -------------------------------------------------------------------------------- 1 | python3 -------------------------------------------------------------------------------- /.venv/bin/python3: -------------------------------------------------------------------------------- 1 | /usr/local/bin/python3 -------------------------------------------------------------------------------- /install_oracle.sh: -------------------------------------------------------------------------------- 1 | src/memorizz/scripts/install_oracle.sh -------------------------------------------------------------------------------- /src/memorizz/short_term_memory/working_memory/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | """Unit tests for MemAgent components.""" 2 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Test package for MemAgent refactored architecture.""" 2 | -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | """Integration tests for MemAgent components.""" 2 | -------------------------------------------------------------------------------- /tests/mocks/__init__.py: -------------------------------------------------------------------------------- 1 | """Mock objects for testing MemAgent components.""" 2 | -------------------------------------------------------------------------------- /tests/performance/__init__.py: -------------------------------------------------------------------------------- 1 | """Performance and stress tests for MemAgent.""" 2 | -------------------------------------------------------------------------------- /.venv/pyvenv.cfg: -------------------------------------------------------------------------------- 1 | home = /usr/local/bin 2 | include-system-site-packages = false 3 | version = 3.7.9 4 | -------------------------------------------------------------------------------- /src/memorizz/long_term_memory/procedural/toolbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .toolbox import Toolbox 2 | 3 | __all__ = ["Toolbox"] 4 | -------------------------------------------------------------------------------- /src/memorizz/coordination/__init__.py: -------------------------------------------------------------------------------- 1 | from .shared_memory.shared_memory import SharedMemory 2 | 3 | __all__ = ["SharedMemory"] 4 | -------------------------------------------------------------------------------- /examples/images/memorizz_script_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RichmondAlake/memorizz/HEAD/examples/images/memorizz_script_output.png -------------------------------------------------------------------------------- /src/memorizz/long_term_memory/procedural/workflow/__init__.py: -------------------------------------------------------------------------------- 1 | from .workflow import Workflow 2 | 3 | __all__ = ["Workflow", "WorkflowOutcome"] 4 | -------------------------------------------------------------------------------- /src/memorizz/long_term_memory/procedural/__init__.py: -------------------------------------------------------------------------------- 1 | from .toolbox import Toolbox 2 | from .workflow import Workflow 3 | 4 | __all__ = ["Toolbox", "Workflow"] 5 | -------------------------------------------------------------------------------- /src/memorizz/long_term_memory/semantic/persona/__init__.py: -------------------------------------------------------------------------------- 1 | from .persona import Persona 2 | from .role_type import RoleType 3 | 4 | __all__ = ["Persona", "RoleType"] 5 | -------------------------------------------------------------------------------- /src/memorizz/short_term_memory/__init__.py: -------------------------------------------------------------------------------- 1 | from .semantic_cache import SemanticCache 2 | from .working_memory.cwm import CWM 3 | 4 | __all__ = ["SemanticCache", "CWM"] 5 | -------------------------------------------------------------------------------- /src/memorizz/memory_provider/mongodb/__init__.py: -------------------------------------------------------------------------------- 1 | from .provider import MongoDBConfig, MongoDBProvider 2 | 3 | __all__ = [ 4 | "MongoDBProvider", 5 | "MongoDBConfig", 6 | ] 7 | -------------------------------------------------------------------------------- /src/memorizz/llms/__init__.py: -------------------------------------------------------------------------------- 1 | from .azure import AzureOpenAI 2 | from .huggingface import HuggingFaceLLM 3 | from .openai import OpenAI 4 | 5 | __all__ = ["OpenAI", "AzureOpenAI", "HuggingFaceLLM"] 6 | -------------------------------------------------------------------------------- /src/memorizz/memory_provider/filesystem/__init__.py: -------------------------------------------------------------------------------- 1 | """Filesystem-based memory provider.""" 2 | 3 | from .provider import FileSystemConfig, FileSystemProvider 4 | 5 | __all__ = ["FileSystemConfig", "FileSystemProvider"] 6 | -------------------------------------------------------------------------------- /src/memorizz/long_term_memory/episodic/__init__.py: -------------------------------------------------------------------------------- 1 | from .conversational_memory_unit import ConversationMemoryUnit 2 | from .summary_component import SummaryComponent 3 | 4 | __all__ = ["ConversationMemoryUnit", "SummaryComponent"] 5 | -------------------------------------------------------------------------------- /src/memorizz/internet_access/providers/__init__.py: -------------------------------------------------------------------------------- 1 | """Available internet access providers.""" 2 | 3 | from .firecrawl import FirecrawlProvider 4 | from .tavily import TavilyProvider 5 | 6 | __all__ = ["FirecrawlProvider", "TavilyProvider"] 7 | -------------------------------------------------------------------------------- /src/memorizz/memory_provider/oracle/__init__.py: -------------------------------------------------------------------------------- 1 | from .provider import OracleConfig, OracleProvider 2 | from .setup import setup_oracle_user 3 | 4 | __all__ = [ 5 | "OracleProvider", 6 | "OracleConfig", 7 | "setup_oracle_user", 8 | ] 9 | -------------------------------------------------------------------------------- /src/memorizz/database/__init__.py: -------------------------------------------------------------------------------- 1 | from .mongodb.mongodb_tools import MongoDBTools, MongoDBToolsConfig, get_mongodb_toolbox 2 | 3 | __all__ = [ 4 | # MongoDB tools 5 | "MongoDBTools", 6 | "MongoDBToolsConfig", 7 | "get_mongodb_toolbox", 8 | ] 9 | -------------------------------------------------------------------------------- /src/memorizz/enums/role.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class Role(Enum): 5 | """Enum for different roles in a conversation.""" 6 | 7 | USER = "user" 8 | ASSISTANT = "assistant" 9 | DEVELOPER = "developer" 10 | TOOL = "tool" 11 | -------------------------------------------------------------------------------- /src/memorizz/memagent/orchestrators/multi_agent_orchestrator.py: -------------------------------------------------------------------------------- 1 | """Shim module to keep backwards compatibility for multi-agent orchestrator imports.""" 2 | 3 | from ...multi_agent_orchestrator import MultiAgentOrchestrator 4 | 5 | __all__ = ["MultiAgentOrchestrator"] 6 | -------------------------------------------------------------------------------- /src/memorizz/embeddings/openai/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | OpenAI Embedding Provider 3 | 4 | This package contains the OpenAI embedding provider implementation. 5 | """ 6 | 7 | from .provider import OpenAIEmbeddingProvider 8 | 9 | __all__ = ["OpenAIEmbeddingProvider"] 10 | -------------------------------------------------------------------------------- /src/memorizz/embeddings/azure/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | OpenAI Embedding Provider 3 | 4 | This package contains the OpenAI embedding provider implementation. 5 | """ 6 | 7 | from .provider import AzureOpenAIEmbeddingProvider 8 | 9 | __all__ = ["AzureOpenAIEmbeddingProvider"] 10 | -------------------------------------------------------------------------------- /src/memorizz/embeddings/huggingface/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Hugging Face Embedding Provider 3 | 4 | Exposes the HuggingFaceEmbeddingProvider implementation. 5 | """ 6 | 7 | from .provider import HuggingFaceEmbeddingProvider 8 | 9 | __all__ = ["HuggingFaceEmbeddingProvider"] 10 | -------------------------------------------------------------------------------- /src/memorizz/embeddings/ollama/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ollama Embedding Provider 3 | 4 | This package contains the Ollama embedding provider implementation for local embeddings. 5 | """ 6 | 7 | from .provider import OllamaEmbeddingProvider 8 | 9 | __all__ = ["OllamaEmbeddingProvider"] 10 | -------------------------------------------------------------------------------- /.venv/bin/pip: -------------------------------------------------------------------------------- 1 | #!/Users/richmondalake/Desktop/memorizz/.venv/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import sys 5 | 6 | from pip._internal.cli.main import main 7 | 8 | if __name__ == "__main__": 9 | sys.argv[0] = re.sub(r"(-script\.pyw|\.exe)?$", "", sys.argv[0]) 10 | sys.exit(main()) 11 | -------------------------------------------------------------------------------- /.venv/bin/pip3: -------------------------------------------------------------------------------- 1 | #!/Users/richmondalake/Desktop/memorizz/.venv/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import sys 5 | 6 | from pip._internal.cli.main import main 7 | 8 | if __name__ == "__main__": 9 | sys.argv[0] = re.sub(r"(-script\.pyw|\.exe)?$", "", sys.argv[0]) 10 | sys.exit(main()) 11 | -------------------------------------------------------------------------------- /.venv/bin/pip3.10: -------------------------------------------------------------------------------- 1 | #!/Users/richmondalake/Desktop/memorizz/.venv/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import sys 5 | 6 | from pip._internal.cli.main import main 7 | 8 | if __name__ == "__main__": 9 | sys.argv[0] = re.sub(r"(-script\.pyw|\.exe)?$", "", sys.argv[0]) 10 | sys.exit(main()) 11 | -------------------------------------------------------------------------------- /.venv/bin/pip3.7: -------------------------------------------------------------------------------- 1 | #!/Users/richmondalake/Desktop/memorizz/.venv/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import sys 5 | 6 | from pip._internal.cli.main import main 7 | 8 | if __name__ == "__main__": 9 | sys.argv[0] = re.sub(r"(-script\.pyw|\.exe)?$", "", sys.argv[0]) 10 | sys.exit(main()) 11 | -------------------------------------------------------------------------------- /src/memorizz/memagent/handlers/__init__.py: -------------------------------------------------------------------------------- 1 | """Handler components for MemAgent processing.""" 2 | 3 | from .conversation_handler import ConversationHandler 4 | from .prompt_handler import PromptHandler 5 | from .response_handler import ResponseHandler 6 | 7 | __all__ = ["ConversationHandler", "PromptHandler", "ResponseHandler"] 8 | -------------------------------------------------------------------------------- /src/memorizz/enums/semantic_cache_scope.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class SemanticCacheScope(Enum): 5 | """Scope for semantic cache searches.""" 6 | 7 | LOCAL = "local" # Search only this agent's cache entries (filtered by agent_id) 8 | GLOBAL = "global" # Search across all cache entries (no agent_id filter) 9 | -------------------------------------------------------------------------------- /.venv/bin/easy_install: -------------------------------------------------------------------------------- 1 | #!/Users/richmondalake/Desktop/memorizz/.venv/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import sys 5 | 6 | from setuptools.command.easy_install import main 7 | 8 | if __name__ == "__main__": 9 | sys.argv[0] = re.sub(r"(-script\.pyw|\.exe)?$", "", sys.argv[0]) 10 | sys.exit(main()) 11 | -------------------------------------------------------------------------------- /.venv/bin/easy_install-3.7: -------------------------------------------------------------------------------- 1 | #!/Users/richmondalake/Desktop/memorizz/.venv/bin/python3 2 | # -*- coding: utf-8 -*- 3 | import re 4 | import sys 5 | 6 | from setuptools.command.easy_install import main 7 | 8 | if __name__ == "__main__": 9 | sys.argv[0] = re.sub(r"(-script\.pyw|\.exe)?$", "", sys.argv[0]) 10 | sys.exit(main()) 11 | -------------------------------------------------------------------------------- /src/memorizz/long_term_memory/semantic/entity_memory/__init__.py: -------------------------------------------------------------------------------- 1 | """Entity memory exports.""" 2 | 3 | from .entity_memory import ( 4 | EntityAttribute, 5 | EntityMemory, 6 | EntityMemoryRecord, 7 | EntityRelation, 8 | ) 9 | 10 | __all__ = [ 11 | "EntityMemory", 12 | "EntityMemoryRecord", 13 | "EntityAttribute", 14 | "EntityRelation", 15 | ] 16 | -------------------------------------------------------------------------------- /src/memorizz/enums/__init__.py: -------------------------------------------------------------------------------- 1 | from .application_mode import ApplicationMode, ApplicationModeConfig 2 | from .memory_type import MemoryType 3 | from .role import Role 4 | from .semantic_cache_scope import SemanticCacheScope 5 | 6 | __all__ = [ 7 | "Role", 8 | "ApplicationMode", 9 | "ApplicationModeConfig", 10 | "MemoryType", 11 | "SemanticCacheScope", 12 | ] 13 | -------------------------------------------------------------------------------- /src/memorizz/memagent/orchestrators/__init__.py: -------------------------------------------------------------------------------- 1 | """Orchestrator components for MemAgent coordination.""" 2 | 3 | from .deep_research import DeepResearchOrchestrator, DeepResearchWorkflow 4 | from .multi_agent_orchestrator import MultiAgentOrchestrator 5 | 6 | __all__ = [ 7 | "MultiAgentOrchestrator", 8 | "DeepResearchOrchestrator", 9 | "DeepResearchWorkflow", 10 | ] 11 | -------------------------------------------------------------------------------- /src/memorizz/memory_unit/__init__.py: -------------------------------------------------------------------------------- 1 | from .conversational_memory_unit import ConversationMemoryUnit 2 | from .memory_unit import MemoryUnit 3 | from .semantic_cache_entry import SemanticCacheEntry 4 | from .summary_component import SummaryComponent, SummaryMetrics 5 | 6 | __all__ = [ 7 | "MemoryUnit", 8 | "ConversationMemoryUnit", 9 | "SummaryComponent", 10 | "SummaryMetrics", 11 | "SemanticCacheEntry", 12 | ] 13 | -------------------------------------------------------------------------------- /src/memorizz/memory_unit/conversational_memory_unit.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class ConversationMemoryUnit(BaseModel): 7 | role: str 8 | content: str 9 | timestamp: str 10 | memory_id: str 11 | conversation_id: str 12 | embedding: list[float] 13 | recall_recency: Optional[float] = None 14 | associated_conversation_ids: Optional[list[str]] = None 15 | -------------------------------------------------------------------------------- /src/memorizz/memagent/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Utility components for MemAgent.""" 2 | 3 | from .formatters import PromptFormatter, ResponseFormatter 4 | from .helpers import IDGenerator, TimestampHelper 5 | from .validators import ConfigValidator, InputValidator 6 | 7 | __all__ = [ 8 | "ConfigValidator", 9 | "InputValidator", 10 | "PromptFormatter", 11 | "ResponseFormatter", 12 | "IDGenerator", 13 | "TimestampHelper", 14 | ] 15 | -------------------------------------------------------------------------------- /src/memorizz/embeddings/voyageai/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | VoyageAI Embedding Provider 3 | 4 | This package contains the VoyageAI embedding provider implementation with support for: 5 | - Text embeddings with multiple models and configurable dimensions 6 | - Multimodal embeddings for text and images 7 | - Contextualized chunk embeddings for documents 8 | """ 9 | 10 | from .provider import VoyageAIEmbeddingProvider 11 | 12 | __all__ = ["VoyageAIEmbeddingProvider"] 13 | -------------------------------------------------------------------------------- /src/memorizz/long_term_memory/semantic/__init__.py: -------------------------------------------------------------------------------- 1 | from .entity_memory import ( 2 | EntityAttribute, 3 | EntityMemory, 4 | EntityMemoryRecord, 5 | EntityRelation, 6 | ) 7 | from .knowledge_base import KnowledgeBase 8 | from .persona import Persona, RoleType 9 | 10 | __all__ = [ 11 | "KnowledgeBase", 12 | "Persona", 13 | "RoleType", 14 | "EntityMemory", 15 | "EntityMemoryRecord", 16 | "EntityAttribute", 17 | "EntityRelation", 18 | ] 19 | -------------------------------------------------------------------------------- /src/memorizz/memagent/builders/__init__.py: -------------------------------------------------------------------------------- 1 | """Builder components for MemAgent.""" 2 | 3 | from .agent_builder import ( 4 | MemAgentBuilder, 5 | create_assistant, 6 | create_chatbot, 7 | create_deep_research_agent, 8 | create_task_agent, 9 | ) 10 | from .config_builder import ConfigBuilder 11 | 12 | __all__ = [ 13 | "MemAgentBuilder", 14 | "ConfigBuilder", 15 | "create_assistant", 16 | "create_chatbot", 17 | "create_task_agent", 18 | "create_deep_research_agent", 19 | ] 20 | -------------------------------------------------------------------------------- /src/memorizz/coordination/shared_memory/__init__.py: -------------------------------------------------------------------------------- 1 | from .messages import ( 2 | SharedMemoryMessage, 3 | SharedMemoryMessageType, 4 | create_command_message, 5 | create_report_message, 6 | create_status_message, 7 | ) 8 | from .shared_memory import BlackboardEntry, SharedMemory 9 | 10 | __all__ = [ 11 | "SharedMemory", 12 | "BlackboardEntry", 13 | "SharedMemoryMessage", 14 | "SharedMemoryMessageType", 15 | "create_command_message", 16 | "create_status_message", 17 | "create_report_message", 18 | ] 19 | -------------------------------------------------------------------------------- /src/memorizz/long_term_memory/episodic/conversational_memory_unit.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class ConversationMemoryUnit(BaseModel): 7 | role: str 8 | content: str 9 | timestamp: str 10 | memory_id: str 11 | conversation_id: str 12 | embedding: Optional[ 13 | list[float] 14 | ] = None # Optional for Oracle VECTOR (NULL vs empty list) 15 | agent_id: Optional[str] = None 16 | recall_recency: Optional[float] = None 17 | associated_conversation_ids: Optional[list[str]] = None 18 | -------------------------------------------------------------------------------- /src/memorizz/enums/memory_type.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class MemoryType(Enum): 5 | """Enum for different types of memory stores.""" 6 | 7 | PERSONAS = "personas" 8 | TOOLBOX = "toolbox" 9 | ENTITY_MEMORY = "entity_memory" 10 | SHORT_TERM_MEMORY = "short_term_memory" 11 | LONG_TERM_MEMORY = "long_term_memory" 12 | CONVERSATION_MEMORY = "conversation_memory" 13 | WORKFLOW_MEMORY = "workflow_memory" 14 | MEMAGENT = "agents" 15 | SHARED_MEMORY = "shared_memory" 16 | SUMMARIES = "summaries" 17 | SEMANTIC_CACHE = "semantic_cache" 18 | -------------------------------------------------------------------------------- /src/memorizz/memagent/constants.py: -------------------------------------------------------------------------------- 1 | """Configuration constants for MemAgent.""" 2 | 3 | import os 4 | 5 | # Configuration constants 6 | DEFAULT_INSTRUCTION = "You are a helpful assistant." 7 | DEFAULT_MAX_STEPS = 20 8 | DEFAULT_TOOL_ACCESS = "private" 9 | 10 | # Logging configuration 11 | MEMORIZZ_LOG_LEVEL = os.getenv("MEMORIZZ_LOG_LEVEL", "DEBUG").upper() 12 | 13 | # Application modes 14 | APPLICATION_MODES = { 15 | "assistant": "General purpose assistant", 16 | "chatbot": "Conversational chatbot", 17 | "agent": "Task-oriented agent", 18 | } 19 | 20 | # Memory types 21 | DEFAULT_MEMORY_TYPES = ["conversation_memory", "semantic_memory"] 22 | -------------------------------------------------------------------------------- /src/memorizz/memagent/managers/__init__.py: -------------------------------------------------------------------------------- 1 | """Manager components for MemAgent functionality.""" 2 | 3 | from .cache_manager import CacheManager 4 | from .entity_memory_manager import EntityMemoryManager 5 | from .internet_access_manager import InternetAccessManager 6 | from .memory_manager import MemoryManager 7 | from .persona_manager import PersonaManager 8 | from .tool_manager import ToolManager 9 | from .workflow_manager import WorkflowManager 10 | 11 | __all__ = [ 12 | "MemoryManager", 13 | "ToolManager", 14 | "CacheManager", 15 | "PersonaManager", 16 | "WorkflowManager", 17 | "EntityMemoryManager", 18 | "InternetAccessManager", 19 | ] 20 | -------------------------------------------------------------------------------- /src/memorizz/memory_provider/oracle/requirements.txt: -------------------------------------------------------------------------------- 1 | # Oracle AI Database Memory Provider Requirements 2 | 3 | # Oracle Database Driver (official) 4 | # Supports Oracle Database 23ai and 26ai with VECTOR datatype 5 | oracledb>=2.0.0 6 | 7 | # Note: This is the modern replacement for cx_Oracle 8 | # It supports both "thick" and "thin" modes: 9 | # - Thin mode: Pure Python, no Oracle Client required (default) 10 | # - Thick mode: Uses Oracle Client libraries (optional, for advanced features) 11 | 12 | # Installation: 13 | # pip install oracledb 14 | 15 | # For thick mode (optional), you also need Oracle Instant Client: 16 | # https://www.oracle.com/database/technologies/instant-client.html 17 | -------------------------------------------------------------------------------- /src/memorizz/memagent/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | MemAgent module. 3 | 4 | This module provides a maintainable structure while maintaining 5 | 100% backward compatibility with existing code. 6 | """ 7 | 8 | # Import core components 9 | from .core import MemAgent 10 | 11 | # Optional: Import managers for advanced users 12 | from .managers import ( 13 | CacheManager, 14 | MemoryManager, 15 | PersonaManager, 16 | ToolManager, 17 | WorkflowManager, 18 | ) 19 | from .models import MemAgentConfig, MemAgentModel 20 | 21 | # Export all public APIs 22 | __all__ = [ 23 | "MemAgent", 24 | "MemAgentModel", 25 | "MemAgentConfig", 26 | "MemoryManager", 27 | "ToolManager", 28 | "CacheManager", 29 | "PersonaManager", 30 | "WorkflowManager", 31 | ] 32 | -------------------------------------------------------------------------------- /src/memorizz/memagent.py: -------------------------------------------------------------------------------- 1 | """ 2 | MemAgent - Backward compatibility wrapper. 3 | 4 | This file maintains backward compatibility for existing code that imports 5 | from memorizz.memagent. 6 | 7 | Now uses the refactored memagent/ module with unified MemoryProvider interface. 8 | The original implementation is preserved in memagent_original_backup.py for reference. 9 | """ 10 | 11 | # Import from the refactored implementation 12 | from .memagent.core import MemAgent 13 | from .memagent.models import MemAgentModel 14 | 15 | # Re-export all public APIs to maintain backward compatibility 16 | __all__ = ["MemAgent", "MemAgentModel"] 17 | 18 | # This ensures that code like: 19 | # from memorizz.memagent import MemAgent 20 | # continues to work with both old and new calling conventions 21 | -------------------------------------------------------------------------------- /src/memorizz/long_term_memory/__init__.py: -------------------------------------------------------------------------------- 1 | from .episodic.conversational_memory_unit import ConversationMemoryUnit 2 | from .episodic.summary_component import SummaryComponent 3 | from .procedural.toolbox import Toolbox 4 | from .procedural.workflow import Workflow 5 | from .semantic.entity_memory import ( 6 | EntityAttribute, 7 | EntityMemory, 8 | EntityMemoryRecord, 9 | EntityRelation, 10 | ) 11 | from .semantic.knowledge_base import KnowledgeBase 12 | from .semantic.persona import Persona 13 | 14 | __all__ = [ 15 | "KnowledgeBase", 16 | "Persona", 17 | "EntityMemory", 18 | "EntityMemoryRecord", 19 | "EntityAttribute", 20 | "EntityRelation", 21 | "Toolbox", 22 | "Workflow", 23 | "ConversationMemoryUnit", 24 | "SummaryComponent", 25 | ] 26 | -------------------------------------------------------------------------------- /src/memorizz/long_term_memory/procedural/toolbox/tool_schema.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class ParameterSchema(BaseModel): 7 | """ 8 | A schema for the parameter. 9 | """ 10 | 11 | name: str 12 | description: str 13 | type: str 14 | required: bool 15 | 16 | 17 | class FunctionSchema(BaseModel): 18 | """ 19 | A schema for the function. 20 | """ 21 | 22 | name: str 23 | description: str 24 | parameters: list[ParameterSchema] 25 | required: List[str] 26 | queries: List[str] 27 | 28 | 29 | class ToolSchemaType(BaseModel): 30 | """ 31 | A schema for the tool. 32 | This can be the OpenAI function calling schema or Google function calling schema. 33 | """ 34 | 35 | type: str 36 | function: FunctionSchema 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *.so 5 | 6 | # Distribution / packaging 7 | .Python 8 | build/ 9 | develop-eggs/ 10 | dist/ 11 | downloads/ 12 | eggs/ 13 | .eggs/ 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | wheels/ 20 | *.egg-info/ 21 | .installed.cfg 22 | *.egg 23 | 24 | # Virtual environments 25 | venv/ 26 | ENV/ 27 | .venv/ 28 | 29 | # IDEs 30 | .vscode/ 31 | .idea/ 32 | 33 | # OS generated files 34 | .DS_Store 35 | Thumbs.db 36 | 37 | # Test Notebooks 38 | dev_test.ipynb 39 | 40 | # Environment variables and secrets 41 | .env 42 | .env.* 43 | *.env 44 | *.key 45 | *.pem 46 | *.p12 47 | *.pfx 48 | secrets.json 49 | credentials.json 50 | config.local.* 51 | *.secret 52 | 53 | # Examples folder (local testing only) 54 | src/memorizz/examples/ 55 | 56 | # Bug reports 57 | .bug_tracking/ 58 | 59 | # Build artifacts 60 | site/ 61 | *.map 62 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # MemoRizz Environment Variables Configuration 2 | # Copy this file to .env and fill in your actual values 3 | # DO NOT commit .env to version control 4 | 5 | # OpenAI API Configuration (Required) 6 | # Get your API key from: https://platform.openai.com/api-keys 7 | OPENAI_API_KEY= 8 | 9 | # Oracle Database Configuration 10 | # Admin credentials (for setup only) 11 | ORACLE_ADMIN_USER=system 12 | ORACLE_ADMIN_PASSWORD=MyPassword123! 13 | 14 | # MemoRizz database user credentials 15 | ORACLE_USER=memorizz_user 16 | ORACLE_PASSWORD=SecurePass123! 17 | ORACLE_DSN=localhost:1521/FREEPDB1 18 | 19 | # Optional: Backend selection (oracle, mongodb) 20 | MEMORIZZ_BACKEND=oracle 21 | 22 | # Optional: Schema name (defaults to username) 23 | MEMORIZZ_SCHEMA=MEMORIZZ_USER 24 | 25 | # Optional: Docker platform flag for Apple Silicon 26 | # Uncomment and set if you're on Apple Silicon (M1/M2/M3) 27 | # PLATFORM_FLAG=--platform linux/amd64 28 | -------------------------------------------------------------------------------- /docs/use-cases/workflow-mode.md: -------------------------------------------------------------------------------- 1 | # Workflow Mode 2 | 3 | Workflow mode targets deterministic task execution (think onboarding checklists, ticket triage, or knowledge-base upkeep). It favors procedural memory and tools over conversational depth. 4 | 5 | ## Memory Stack 6 | 7 | - `MemoryType.WORKFLOW_MEMORY` 8 | - `MemoryType.TOOLBOX` 9 | - `MemoryType.LONG_TERM_MEMORY` 10 | - `MemoryType.SHORT_TERM_MEMORY` 11 | 12 | ## Sample Flow 13 | 14 | ```python 15 | from memorizz.enums import ApplicationMode 16 | 17 | agent = (MemAgentBuilder() 18 | .with_application_mode(ApplicationMode.WORKFLOW) 19 | .with_memory_provider(provider) 20 | .with_tool(module_path="memorizz.tools.workflow") 21 | .build()) 22 | 23 | agent.run("Process ticket 12491 and update the changelog") 24 | ``` 25 | 26 | Workflow mode keeps episodic memory minimal so the agent can stay focused on the currently executing process. Pair it with shared memory if you need a supervisor agent to inspect progress. 27 | -------------------------------------------------------------------------------- /docs/use-cases/assistant-mode.md: -------------------------------------------------------------------------------- 1 | # Assistant Mode 2 | 3 | Assistant mode is the default conversational setup for MemoRizz. It prioritizes continuity, personalization, and a rich memory stack so users feel like they're chatting with the same agent every time. 4 | 5 | ## Memory Stack 6 | 7 | - `MemoryType.CONVERSATION_MEMORY` 8 | - `MemoryType.LONG_TERM_MEMORY` + `MemoryType.ENTITY_MEMORY` 9 | - `MemoryType.PERSONAS` 10 | - `MemoryType.SHORT_TERM_MEMORY` 11 | - `MemoryType.SUMMARIES` 12 | 13 | ## Configuration 14 | 15 | ```python 16 | from memorizz.enums import ApplicationMode 17 | 18 | agent = (MemAgentBuilder() 19 | .with_application_mode(ApplicationMode.ASSISTANT) 20 | .with_memory_provider(provider) 21 | ... 22 | .build()) 23 | ``` 24 | 25 | ## Tips 26 | 27 | - Seed personas with voice/tone guidelines and safety rails. 28 | - Use entity memory to store user preferences (e.g., "prefers dark mode UI"). 29 | - Enable semantic cache for repeated Q&A answers to cut LLM costs. 30 | 31 | Assistant mode is ideal for customer support, onboarding companions, or internal help desks. 32 | -------------------------------------------------------------------------------- /src/memorizz/memory_unit/semantic_cache_entry.py: -------------------------------------------------------------------------------- 1 | """ 2 | Semantic Cache Entry for MemAgent 3 | 4 | Represents a cached query-response pair with metadata for semantic similarity matching. 5 | """ 6 | 7 | from typing import Any, Dict, List, Optional 8 | 9 | from pydantic import BaseModel 10 | 11 | 12 | class SemanticCacheEntry(BaseModel): 13 | """ 14 | Represents a cached query-response pair with metadata. 15 | 16 | This memory unit stores semantic cache entries that enable fast retrieval 17 | of similar queries through vector similarity matching. 18 | """ 19 | 20 | query: str 21 | response: str 22 | embedding: List[float] 23 | timestamp: float 24 | session_id: Optional[str] = None 25 | memory_id: Optional[str] = None 26 | agent_id: Optional[str] = None 27 | usage_count: int = 0 28 | last_accessed: Optional[float] = None 29 | metadata: Optional[Dict[str, Any]] = None 30 | cache_key: Optional[str] = None 31 | 32 | def model_post_init(self, __context) -> None: 33 | """Initialize last_accessed if not provided.""" 34 | if self.last_accessed is None: 35 | self.last_accessed = self.timestamp 36 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Docs 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | workflow_dispatch: 7 | 8 | permissions: 9 | contents: read 10 | pages: write 11 | id-token: write 12 | 13 | concurrency: 14 | group: "pages" 15 | cancel-in-progress: true 16 | 17 | jobs: 18 | build: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - name: Checkout 22 | uses: actions/checkout@v4 23 | - name: Setup Python 24 | uses: actions/setup-python@v5 25 | with: 26 | python-version: "3.11" 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install -e .[docs] 31 | - name: Build site 32 | run: mkdocs build --strict 33 | - name: Upload artifact 34 | uses: actions/upload-pages-artifact@v3 35 | with: 36 | path: site/ 37 | 38 | deploy: 39 | needs: build 40 | runs-on: ubuntu-latest 41 | environment: 42 | name: github-pages 43 | url: ${{ steps.deployment.outputs.page_url }} 44 | steps: 45 | - name: Deploy to GitHub Pages 46 | id: deployment 47 | uses: actions/deploy-pages@v4 48 | -------------------------------------------------------------------------------- /docs/memory-types/episodic.md: -------------------------------------------------------------------------------- 1 | # Episodic Memory 2 | 3 | Episodic memory chronicles every interaction an agent has with users, teammates, or tools. It lives under `src/memorizz/long_term_memory/episodic/` and fulfills both `MemoryType.CONVERSATION_MEMORY` and `MemoryType.SUMMARIES`. 4 | 5 | ## Structure 6 | 7 | - **Conversation Memory Units** – Raw transcripts with timestamps, speaker metadata, and embeddings for semantic retrieval. 8 | - **Summaries** – Periodic rollups that compress older chunks to keep prompts small while retaining context. 9 | 10 | ## Example 11 | 12 | ```python 13 | agent.memory.conversation_memory.add_message( 14 | role="user", 15 | content="Can you remind me of the Oracle setup steps?", 16 | ) 17 | 18 | agent.memory.summaries.create_or_update( 19 | topic="setup", 20 | content="User configured Oracle last week and is stuck on connection pooling.", 21 | ) 22 | ``` 23 | 24 | ## Use Cases 25 | 26 | - Long-running assistants that must reference previous sessions 27 | - Relationship and preference tracking for customer success bots 28 | - Auditable records of how multi-agent systems reached a decision 29 | 30 | Pair episodic memory with semantic cache or working memory to prioritize the most relevant snippets for a given prompt window. 31 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Pre-commit hooks for code quality 2 | repos: 3 | - repo: https://github.com/pre-commit/pre-commit-hooks 4 | rev: v4.5.0 5 | hooks: 6 | - id: trailing-whitespace 7 | - id: end-of-file-fixer 8 | - id: check-yaml 9 | - id: check-added-large-files 10 | - id: check-ast # Check Python syntax 11 | - id: check-merge-conflict 12 | 13 | - repo: https://github.com/psf/black 14 | rev: 23.12.1 15 | hooks: 16 | - id: black 17 | language_version: python3.11 18 | 19 | - repo: https://github.com/pycqa/flake8 20 | rev: 7.0.0 21 | hooks: 22 | - id: flake8 23 | args: [ 24 | '--max-line-length=120', 25 | '--extend-ignore=E203,E501,E712,F401,F541,F841' 26 | ] 27 | exclude: | 28 | (?x)^( 29 | .*_backup\.py| 30 | memagent_original_backup\.py| 31 | provider_backup\.py| 32 | eval/.*| 33 | tests/.* 34 | )$ 35 | 36 | - repo: https://github.com/pycqa/isort 37 | rev: 5.13.2 38 | hooks: 39 | - id: isort 40 | args: ['--profile', 'black'] 41 | 42 | - repo: https://github.com/kynan/nbstripout 43 | rev: 0.6.1 44 | hooks: 45 | - id: nbstripout 46 | args: ['--extra-keys', 'metadata.collapsed,metadata.scrolled'] 47 | -------------------------------------------------------------------------------- /docs/memory-types/short-term.md: -------------------------------------------------------------------------------- 1 | # Short-Term Memory 2 | 3 | Short-term memory is the agent's active workspace. MemoRizz separates it into a semantic cache and a working-memory controller located under `src/memorizz/short_term_memory/`. 4 | 5 | ## Semantic Cache (`MemoryType.SEMANTIC_CACHE`) 6 | 7 | - Stores short-lived key/value pairs with embeddings for fast similarity matches. 8 | - Ideal for caching expensive LLM responses, transient API payloads, or session-only facts. 9 | - Automatically expires or can be explicitly cleared when you rotate sessions. 10 | 11 | ```python 12 | agent.memory.semantic_cache.save( 13 | key="oracle_setup_docs", 14 | value={"answer": "Install client, run memorizz setup-oracle"}, 15 | ) 16 | ``` 17 | 18 | ## Working Memory (`MemoryType.SHORT_TERM_MEMORY`) 19 | 20 | - Tracks the active conversation window across all memory sources. 21 | - Manages token budgets by summarizing or truncating inputs before they reach the LLM. 22 | - Responsible for stitching retrieved semantic, episodic, and procedural memories into a cohesive prompt. 23 | 24 | ```python 25 | window = agent.memory.short_term.window_for(agent_id=agent.id) 26 | window.push_user_message("Give me the highlights from yesterday's sync.") 27 | ``` 28 | 29 | Short-term memory keeps the agent grounded in the current turn while semantic + episodic stores provide long-term continuity. 30 | -------------------------------------------------------------------------------- /docs/memory-types/shared.md: -------------------------------------------------------------------------------- 1 | # Shared Memory 2 | 3 | Shared memory powers coordination between multiple agents. It sits in `src/memorizz/coordination/shared_memory/` and corresponds to `MemoryType.SHARED_MEMORY`. 4 | 5 | ## Why It Exists 6 | 7 | Complex workflows often split responsibilities across researcher, analyst, and writer agents. Shared memory provides a blackboard-like store where agents can exchange artifacts, delegate tasks, and keep track of global progress. 8 | 9 | ## Creating a Session 10 | 11 | ```python 12 | from memorizz.coordination.shared_memory import SharedMemory 13 | 14 | shared = SharedMemory(memory_provider) 15 | session_id = shared.create_shared_session( 16 | root_agent_id="orchestrator", 17 | delegate_agent_ids=["researcher", "writer"], 18 | ) 19 | ``` 20 | 21 | Each session keeps: 22 | 23 | - Participants and roles 24 | - Messages and artifacts exchanged between agents 25 | - Links to the originating episodic/semantic records for traceability 26 | 27 | ## Patterns 28 | 29 | - Orchestrator + delegate setups (research, summarization, QA) 30 | - Human-in-the-loop review queues where both agents and operators inspect shared state 31 | - Multi-modal agents handing off voice, vision, or text data through a common buffer 32 | 33 | Shared memory complements the per-agent stores so everyone observes the same document trail without duplicating data. 34 | -------------------------------------------------------------------------------- /.venv/bin/activate.csh: -------------------------------------------------------------------------------- 1 | # This file must be used with "source bin/activate.csh" *from csh*. 2 | # You cannot run it directly. 3 | # Created by Davide Di Blasi . 4 | # Ported to Python 3.3 venv by Andrew Svetlov 5 | 6 | alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; test "\!:*" != "nondestructive" && unalias deactivate' 7 | 8 | # Unset irrelevant variables. 9 | deactivate nondestructive 10 | 11 | setenv VIRTUAL_ENV "/Users/richmondalake/Desktop/memorizz/.venv" 12 | 13 | set _OLD_VIRTUAL_PATH="$PATH" 14 | setenv PATH "$VIRTUAL_ENV/bin:$PATH" 15 | 16 | 17 | set _OLD_VIRTUAL_PROMPT="$prompt" 18 | 19 | if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then 20 | if (".venv" != "") then 21 | set env_name = ".venv" 22 | else 23 | if (`basename "VIRTUAL_ENV"` == "__") then 24 | # special case for Aspen magic directories 25 | # see http://www.zetadev.com/software/aspen/ 26 | set env_name = `basename \`dirname "$VIRTUAL_ENV"\`` 27 | else 28 | set env_name = `basename "$VIRTUAL_ENV"` 29 | endif 30 | endif 31 | set prompt = "[$env_name] $prompt" 32 | unset env_name 33 | endif 34 | 35 | alias pydoc python -m pydoc 36 | 37 | rehash 38 | -------------------------------------------------------------------------------- /docs/memory-types/semantic.md: -------------------------------------------------------------------------------- 1 | # Semantic Memory 2 | 3 | Semantic memory stores canonical facts, personas, and entity attributes that rarely change. In MemoRizz, this maps to `src/memorizz/long_term_memory/semantic/` and is backed by the `MemoryType.LONG_TERM_MEMORY` and `MemoryType.ENTITY_MEMORY` enums. 4 | 5 | ## Components 6 | 7 | - **Knowledge Base** – Vectorized documents segmented by namespace or topic. 8 | - **Personas** – Behavioral instructions, tone, and guardrails that shape agent responses. 9 | - **Entity Memory** – Structured attributes for people, organizations, or devices. The `entity_memory` module exposes helper methods to upsert and query profile fields. 10 | 11 | ## Typical Operations 12 | 13 | ```python 14 | kb_id = agent.memory.long_term.save_document( 15 | namespace="support", 16 | content="The premium plan includes unlimited vector storage.", 17 | ) 18 | 19 | agent.memory.entity_memory.upsert( 20 | entity_id="company_acme", 21 | attributes={"plan": "premium"}, 22 | ) 23 | ``` 24 | 25 | The provider automatically embeds the document, stores metadata, and tags the record with the owning agent or namespace. 26 | 27 | ## When to Use 28 | 29 | - Product catalogs and policy manuals 30 | - Persona systems for specialized assistants (support, researcher, interviewer) 31 | - Entity profiles that must persist across sessions and devices 32 | 33 | Semantic memory powers long-lived recall. Pair it with episodic memory when you also care about interaction history. 34 | -------------------------------------------------------------------------------- /docs/memory-types/procedural.md: -------------------------------------------------------------------------------- 1 | # Procedural Memory 2 | 3 | Procedural memory captures *how* an agent should act. It bundles tool registration, workflows, and scripted behaviors so that the agent can plan or execute actions consistently. Source code lives in `src/memorizz/long_term_memory/procedural/`. 4 | 5 | ## Components 6 | 7 | - **Toolbox (`MemoryType.TOOLBOX`)** – Python callables wrapped with metadata so LLMs can discover and execute them safely. 8 | - **Workflow Memory (`MemoryType.WORKFLOW_MEMORY`)** – Declarative or code-defined processes that map multi-step plans. 9 | - **Personas** – While technically part of semantic memory, personas often work hand-in-hand with procedural steps to enforce tone and guardrails. 10 | 11 | ## Registering Tools 12 | 13 | ```python 14 | from memorizz.long_term_memory.procedural.toolbox import register_tool 15 | 16 | @register_tool(name="system_status", description="Return current system status") 17 | def system_status(): 18 | ... 19 | 20 | agent.memory.toolbox.sync_registered_tools() 21 | ``` 22 | 23 | Each tool is stored inside your configured provider with embedding metadata so agents can retrieve the right action based on the natural language plan they produce. 24 | 25 | ## When to Reach for Procedural Memory 26 | 27 | - Automations that call APIs, databases, or internal services 28 | - Agents that must follow compliance-friendly workflows 29 | - Research or analyst bots that gather, synthesize, then report findings based on a repeatable checklist 30 | -------------------------------------------------------------------------------- /docs/memory-providers/custom.md: -------------------------------------------------------------------------------- 1 | # Bring Your Own Provider 2 | 3 | MemoRizz decouples the high-level memory interfaces from the backing database via the `MemoryProvider` base class (`src/memorizz/memory_provider/base.py`). Implementing your own provider lets you plug in any datastore that can persist JSON blobs plus embeddings. 4 | 5 | ## Steps 6 | 7 | 1. **Subclass `MemoryProvider`** and implement CRUD helpers for each memory bucket you care about (personas, long-term memory, etc.). 8 | 2. **Handle embeddings** – either pre-compute embeddings before storing documents or call the shared embedding registry inside your provider methods. 9 | 3. **Respect schemas** – store the `id`, `agent_id`, `memory_type`, `data`, `embedding`, and timestamps so higher layers can filter and audit records consistently. 10 | 4. **Register the provider** – pass an instance to `MemAgentBuilder().with_memory_provider(...)`. 11 | 12 | ```python 13 | from memorizz.memory_provider.base import MemoryProvider 14 | 15 | class PostgresProvider(MemoryProvider): 16 | def save_persona(self, persona): 17 | ... 18 | ``` 19 | 20 | ## Testing Checklist 21 | 22 | - Run the provider's unit tests under `pytest tests/memory_provider/test_.py`. 23 | - Use `mkdocs serve` to confirm your new provider docs appear under **Memory Providers**. 24 | - Update `pyproject.toml` with a matching optional extra if you ship new dependencies. 25 | 26 | Custom providers make it easy to align MemoRizz with corporate infra while keeping the rest of the SDK untouched. 27 | -------------------------------------------------------------------------------- /docs/memory-providers/mongodb.md: -------------------------------------------------------------------------------- 1 | # MongoDB Provider 2 | 3 | The MongoDB provider offers a lightweight starting point for experimentation or hosted Atlas deployments. It is implemented in `src/memorizz/memory_provider/mongodb/`. 4 | 5 | ## Installation 6 | 7 | ```bash 8 | pip install -e ".[mongodb]" 9 | ``` 10 | 11 | ## Configuration 12 | 13 | ```python 14 | from memorizz.memory_provider.mongodb import MongoDBProvider, MongoDBConfig 15 | 16 | provider = MongoDBProvider(MongoDBConfig( 17 | uri=os.environ["MONGODB_URI"], 18 | database="memorizz", 19 | collection_prefix="agents", 20 | )) 21 | ``` 22 | 23 | Collections are created lazily (e.g., `agents_personas`, `agents_long_term_memory`). Each document stores: 24 | 25 | - Serialized payload (`data`) 26 | - Embedding vectors (array fields you can index with MongoDB Atlas Vector Search) 27 | - Agent + namespace metadata 28 | 29 | ## Atlas Vector Search 30 | 31 | 1. Enable the [Vector Search](https://www.mongodb.com/docs/atlas/atlas-vector-search/) preview on your cluster. 32 | 2. Create an index per collection referencing the embedding field. 33 | 3. Configure the provider with your embedding model dimensions. 34 | 35 | ## When to Choose MongoDB 36 | 37 | - Prototype agents without running Oracle locally 38 | - Serverless / hosted deployments where MongoDB Atlas is already approved 39 | - Horizontal scaling scenarios using MongoDB's built-in sharding 40 | 41 | Use MongoDB for agility and switch to Oracle when you need stronger relational guarantees or AI Vector Search optimizations. 42 | -------------------------------------------------------------------------------- /src/memorizz/__init__.py: -------------------------------------------------------------------------------- 1 | from .coordination import SharedMemory 2 | from .internet_access import ( 3 | FirecrawlProvider, 4 | InternetAccessProvider, 5 | TavilyProvider, 6 | create_internet_access_provider, 7 | ) 8 | from .long_term_memory.procedural.toolbox import Toolbox 9 | from .long_term_memory.semantic import KnowledgeBase 10 | from .long_term_memory.semantic.persona import Persona, RoleType 11 | from .memagent import MemAgent 12 | from .memory_provider import MemoryProvider, MemoryType 13 | from .short_term_memory.working_memory.cwm import CWM 14 | 15 | 16 | # Lazy import MongoDB to avoid requiring pymongo when not needed 17 | def __getattr__(name): 18 | if name == "MongoDBProvider": 19 | from .memory_provider.mongodb import MongoDBProvider 20 | 21 | return MongoDBProvider 22 | if name in ("FileSystemProvider", "FileSystemConfig"): 23 | from .memory_provider.filesystem import FileSystemConfig, FileSystemProvider 24 | 25 | return FileSystemProvider if name == "FileSystemProvider" else FileSystemConfig 26 | raise AttributeError(f"module '{__name__}' has no attribute '{name}'") 27 | 28 | 29 | __all__ = [ 30 | "MemoryProvider", 31 | "MongoDBProvider", 32 | "FileSystemProvider", 33 | "FileSystemConfig", 34 | "MemoryType", 35 | "Persona", 36 | "RoleType", 37 | "Toolbox", 38 | "KnowledgeBase", 39 | "CWM", 40 | "SharedMemory", 41 | "MemAgent", 42 | "InternetAccessProvider", 43 | "FirecrawlProvider", 44 | "TavilyProvider", 45 | "create_internet_access_provider", 46 | ] 47 | -------------------------------------------------------------------------------- /src/memorizz/long_term_memory/semantic/persona/role_type.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class RoleType(Enum): 5 | GENERAL = "General" 6 | ASSISTANT = "Virtual Assistant" 7 | CUSTOMER_SUPPORT = "Customer Support" 8 | TECHNICAL_EXPERT = "Technical Expert" 9 | RESEARCHER = "Researcher" 10 | 11 | 12 | # Predefined default values for each role 13 | PREDEFINED_INFO = { 14 | RoleType.GENERAL: { 15 | "goals": "Provide versatile support across various domains.", 16 | "background": "A general-purpose agent designed to adapt to multiple contexts.", 17 | }, 18 | RoleType.ASSISTANT: { 19 | "goals": "Assist users by offering timely and personalized support.", 20 | "background": "An assistant agent crafted to manage schedules, answer queries, and help with daily tasks.", 21 | }, 22 | RoleType.CUSTOMER_SUPPORT: { 23 | "goals": "Resolve customer issues promptly and provide clear guidance.", 24 | "background": "A customer support agent specialized in understanding user concerns and delivering effective solutions.", 25 | }, 26 | RoleType.TECHNICAL_EXPERT: { 27 | "goals": "Provide expert technical advice and troubleshoot complex problems.", 28 | "background": "A technical expert agent with deep domain knowledge to assist with intricate technical issues.", 29 | }, 30 | RoleType.RESEARCHER: { 31 | "goals": "Conduct thorough research and offer insights on advanced topics.", 32 | "background": "A researcher agent designed to synthesize complex information and present well-informed perspectives.", 33 | }, 34 | } 35 | -------------------------------------------------------------------------------- /src/memorizz/internet_access/models.py: -------------------------------------------------------------------------------- 1 | """Data structures for standardized internet access responses.""" 2 | 3 | from __future__ import annotations 4 | 5 | from dataclasses import dataclass, field 6 | from typing import Any, Dict, Optional 7 | 8 | 9 | @dataclass 10 | class InternetSearchResult: 11 | """Normalized representation of a single search result.""" 12 | 13 | url: str 14 | title: Optional[str] = None 15 | snippet: Optional[str] = None 16 | score: Optional[float] = None 17 | raw: Optional[Dict[str, Any]] = None 18 | metadata: Dict[str, Any] = field(default_factory=dict) 19 | 20 | def to_dict(self) -> Dict[str, Any]: 21 | """Return a serializable dict for tool / LLM consumption.""" 22 | return { 23 | "url": self.url, 24 | "title": self.title, 25 | "snippet": self.snippet, 26 | "score": self.score, 27 | "metadata": self.metadata or None, 28 | "raw": self.raw or None, 29 | } 30 | 31 | 32 | @dataclass 33 | class InternetPageContent: 34 | """Normalized representation of page content scraped from the web.""" 35 | 36 | url: str 37 | title: Optional[str] = None 38 | content: Optional[str] = None 39 | metadata: Dict[str, Any] = field(default_factory=dict) 40 | raw: Optional[Dict[str, Any]] = None 41 | 42 | def to_dict(self) -> Dict[str, Any]: 43 | """Return a serializable dict for tool / LLM consumption.""" 44 | return { 45 | "url": self.url, 46 | "title": self.title, 47 | "content": self.content, 48 | "metadata": self.metadata or None, 49 | "raw": self.raw or None, 50 | } 51 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # MemoRizz Documentation 2 | 3 | MemoRizz helps you build memory-augmented agents that combine long-term knowledge, episodic context, procedural behaviors, and multi-agent coordination. This site keeps the narrative docs, quickstarts, and API references directly inside the repository so the docs never drift from the code that powers them. 4 | 5 | ## What's Inside 6 | 7 | - **Getting Started** walks through the core concepts and SDK setup for your first agent. 8 | - **Memory Types** dives into each cognitive-inspired subsystem and how it maps to the source tree in `src/memorizz/`. 9 | - **Memory Providers** describes the persistence backends (Oracle, MongoDB, or custom) that store the memories. 10 | - **Use Cases** shows how the library stitches memory stacks together for common application modes like assistants or research bots. 11 | 12 | !!! info "Docs live with the code" 13 | Every page in this site is rendered straight from the Markdown under `docs/`. Update a file, run `mkdocs serve`, and the change appears instantly. Merge to `main` and the GitHub Pages workflow publishes the refreshed site automatically. 14 | 15 | ## Quick Start 16 | 17 | ```bash 18 | pip install -e ".[docs]" 19 | mkdocs serve 20 | ``` 21 | 22 | Visit for a hot-reloading docs server. When you're ready to publish, run `mkdocs build --strict` or rely on the provided GitHub Action to deploy to the `gh-pages` branch. 23 | 24 | ## Need More? 25 | 26 | - Check the Python API reference entries embedded throughout the docs via [`mkdocstrings`](https://mkdocstrings.github.io/). 27 | - Browse real workflows in `src/memorizz/examples/` and link them into the docs with snippets or code fences. 28 | - Open an issue or discussion on [GitHub](https://github.com/RichmondAlake/memorizz) if you spot a gap. 29 | -------------------------------------------------------------------------------- /tests/unit/test_firecrawl_provider.py: -------------------------------------------------------------------------------- 1 | """Unit tests for the Firecrawl internet provider.""" 2 | 3 | from __future__ import annotations 4 | 5 | from unittest.mock import MagicMock 6 | 7 | import pytest 8 | 9 | from memorizz.internet_access.providers.firecrawl import FirecrawlProvider 10 | 11 | 12 | @pytest.mark.unit 13 | def test_firecrawl_truncates_large_pages(): 14 | provider = FirecrawlProvider( 15 | api_key="test-key", 16 | base_url="https://example.com", 17 | config={"max_content_chars": 20}, 18 | ) 19 | provider._post = MagicMock( 20 | return_value={ 21 | "markdown": "A" * 50, 22 | "metadata": {"title": "Example"}, 23 | } 24 | ) 25 | 26 | page = provider.fetch_url("https://memorizz.ai") 27 | 28 | assert page.metadata["content_truncated"] is True 29 | assert page.metadata["content_original_characters"] == 50 30 | assert page.metadata["content_returned_characters"] == 20 31 | assert page.content.startswith("A" * 20) 32 | assert "trimmed the page" in page.content 33 | assert page.raw is None 34 | 35 | 36 | @pytest.mark.unit 37 | def test_firecrawl_can_include_sanitized_raw_payload(): 38 | provider = FirecrawlProvider( 39 | api_key="test-key", 40 | base_url="https://example.com", 41 | config={"include_raw_response": True, "max_raw_chars": 5}, 42 | ) 43 | provider._post = MagicMock( 44 | return_value={ 45 | "markdown": "abcdefg", 46 | "metadata": {"title": "Example"}, 47 | "nested": {"rawHtml": "

" + "x" * 20}, 48 | } 49 | ) 50 | 51 | page = provider.fetch_url("https://memorizz.ai") 52 | 53 | assert page.raw is not None 54 | assert page.raw["markdown"].startswith("abcde") 55 | assert "truncated" in page.raw["markdown"] 56 | assert "truncated" in page.raw["nested"]["rawHtml"] 57 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "memorizz" 7 | version = "0.0.38" 8 | description = "A memory management library for Python" 9 | readme = "README.md" 10 | requires-python = ">=3.7" 11 | license = "MIT" 12 | authors = [ 13 | { name = "Richmond Alake", email = "richmond.alake@gmail.com" } 14 | ] 15 | classifiers = [ 16 | "Programming Language :: Python :: 3", 17 | "License :: OSI Approved :: MIT License", 18 | "Operating System :: OS Independent", 19 | ] 20 | dependencies = [ 21 | "openai", 22 | "numpy", 23 | "pydantic>=2.0.0", 24 | "sentence-transformers>=2.5.0", 25 | "transformers>=4.36.0", 26 | "accelerate>=0.26.0", 27 | "requests>=2.31.0", 28 | ] 29 | 30 | [project.optional-dependencies] 31 | mongodb = ["pymongo>=4.0.0"] 32 | oracle = ["oracledb>=2.0.0"] 33 | filesystem = ["faiss-cpu>=1.7.4"] 34 | ollama = ["langchain_ollama", "ollama"] 35 | voyageai = ["voyageai"] 36 | huggingface = [ 37 | "numpy>=1.21,<2", 38 | "sentence-transformers>=2.5.0", 39 | "transformers>=4.36.0", 40 | "accelerate>=0.26.0", 41 | ] 42 | docs = [ 43 | "mkdocs>=1.6.0", 44 | "mkdocs-material>=9.5.0", 45 | "mkdocstrings[python]>=0.24.0", 46 | "mkdocs-git-revision-date-localized-plugin>=1.2.0", 47 | "pymdown-extensions>=10.0", 48 | ] 49 | all = [ 50 | "pymongo>=4.0.0", 51 | "oracledb>=2.0.0", 52 | "langchain_ollama", 53 | "ollama", 54 | "voyageai", 55 | "faiss-cpu>=1.7.4", 56 | "numpy>=1.21,<2", 57 | "sentence-transformers>=2.5.0", 58 | "transformers>=4.36.0", 59 | "accelerate>=0.26.0", 60 | ] 61 | 62 | [project.scripts] 63 | memorizz = "memorizz.cli:main" 64 | 65 | [tool.hatch.metadata] 66 | allow-direct-references = true 67 | 68 | [tool.hatch.build.targets.wheel.sources] 69 | "src" = "" 70 | 71 | [tool.hatch.build.targets.sdist] 72 | exclude = ["temp_unpack*"] 73 | -------------------------------------------------------------------------------- /src/memorizz/tests/test_vegetarian_recipe_agent.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import pytest 5 | from dotenv import load_dotenv 6 | from scenario import Scenario, TestingAgent 7 | 8 | # Add the project root to the Python path 9 | project_root = os.path.abspath( 10 | os.path.join(os.path.dirname(__file__), "..", "..", "..") 11 | ) 12 | sys.path.insert(0, project_root) 13 | load_dotenv() 14 | 15 | from ..memagent import MemAgent # noqa: E402 16 | from ..memory_provider.mongodb.provider import ( # noqa: E402 17 | MongoDBConfig, 18 | MongoDBProvider, 19 | ) 20 | 21 | # Create a memory provider 22 | mongodb_config = MongoDBConfig(uri=os.environ["MONGODB_URI"]) 23 | memory_provider = MongoDBProvider(mongodb_config) 24 | 25 | Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini")) 26 | 27 | mem_agent = MemAgent(memory_provider=memory_provider) 28 | 29 | 30 | @pytest.mark.agent_test 31 | @pytest.mark.asyncio 32 | async def test_vegetarian_recipe_agent(): 33 | agent = mem_agent 34 | 35 | def vegetarian_recipe_agent(message, context): 36 | # Call your agent here 37 | response = agent.run(message) 38 | return {"message": response} 39 | 40 | # Define the scenario 41 | scenario = Scenario( 42 | "User is looking for a dinner idea", 43 | agent=vegetarian_recipe_agent, 44 | success_criteria=[ 45 | "Recipe agent generates a vegetarian recipe", 46 | "Recipe includes a list of ingredients", 47 | "Recipe includes step-by-step cooking instructions", 48 | ], 49 | failure_criteria=[ 50 | "The recipe is not vegetarian or includes meat", 51 | "The agent asks more than two follow-up questions", 52 | ], 53 | ) 54 | 55 | # Run the scenario and get results 56 | result = await scenario.run() 57 | 58 | # Assert for pytest to know whether the test passed 59 | assert result.success 60 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help install lint format check test clean docs-serve docs-build 2 | 3 | help: 4 | @echo "Memorizz Development Commands:" 5 | @echo "" 6 | @echo " make install Install package in editable mode with dev dependencies" 7 | @echo " make lint Run linting (flake8, check syntax)" 8 | @echo " make format Format code with black and isort" 9 | @echo " make check Run lint + format check (pre-commit)" 10 | @echo " make test Run tests" 11 | @echo " make docs-serve Launch mkdocs with hot reload" 12 | @echo " make docs-build Build the static documentation site" 13 | @echo " make clean Clean up generated files" 14 | @echo "" 15 | 16 | install: 17 | pip install -e ".[dev]" 18 | pip install pre-commit black flake8 isort 19 | pre-commit install 20 | 21 | lint: 22 | @echo "Running syntax check..." 23 | @find src/memorizz -name "*.py" ! -name "*backup*" ! -name "*original*" -exec python -m py_compile {} \; 24 | @echo "✓ Syntax check passed" 25 | @echo "" 26 | @echo "Running flake8..." 27 | @flake8 src/memorizz --max-line-length=120 --extend-ignore=E203,E501 --exclude='*backup*,*original*' || true 28 | @echo "" 29 | 30 | format: 31 | @echo "Formatting with black..." 32 | @black src/memorizz 33 | @echo "" 34 | @echo "Sorting imports with isort..." 35 | @isort src/memorizz --profile black 36 | @echo "" 37 | @echo "✓ Code formatted" 38 | 39 | check: 40 | @echo "Running pre-commit checks..." 41 | @pre-commit run --all-files || true 42 | 43 | test: 44 | pytest tests/ -v 45 | 46 | clean: 47 | find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true 48 | find . -type f -name "*.pyc" -delete 49 | find . -type f -name "*.pyo" -delete 50 | find . -type d -name "*.egg-info" -exec rm -rf {} + 2>/dev/null || true 51 | find . -type d -name ".pytest_cache" -exec rm -rf {} + 2>/dev/null || true 52 | 53 | docs-serve: 54 | mkdocs serve 55 | 56 | docs-build: 57 | mkdocs build --strict 58 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [tool:pytest] 2 | # Pytest configuration for MemAgent test suite 3 | 4 | # Test discovery 5 | testpaths = tests 6 | python_files = test_*.py *_test.py 7 | python_classes = Test* 8 | python_functions = test_* 9 | 10 | # Minimum version 11 | minversion = 6.0 12 | 13 | # Add options 14 | addopts = 15 | --verbose 16 | --tb=short 17 | --strict-markers 18 | --strict-config 19 | --disable-warnings 20 | --cov=src/memorizz 21 | --cov-report=html:htmlcov 22 | --cov-report=term-missing 23 | --cov-fail-under=80 24 | 25 | # Markers for organizing tests 26 | markers = 27 | unit: Unit tests for individual components 28 | integration: Integration tests between components 29 | performance: Performance and stress tests 30 | memory: Tests for memory functionality 31 | single_agent: Tests for single agent scenarios 32 | multi_agent: Tests for multi-agent scenarios 33 | backward_compatibility: Tests for backward compatibility 34 | save_load: Tests for save/load functionality 35 | conversation_memory: Tests for conversation memory 36 | semantic_memory: Tests for semantic memory 37 | episodic_memory: Tests for episodic memory 38 | procedural_memory: Tests for procedural memory 39 | e2e: End-to-end tests 40 | stress: Stress testing 41 | benchmark: Performance benchmarks 42 | compatibility: Compatibility tests 43 | slow: Tests that take more time to run 44 | requires_llm: Tests that need actual LLM integration 45 | requires_memory_provider: Tests that need memory provider 46 | 47 | # Directories to ignore during collection 48 | norecursedirs = 49 | .git 50 | .pytest_cache 51 | *.egg-info 52 | build 53 | dist 54 | htmlcov 55 | .venv 56 | venv 57 | 58 | # Timeout for tests (in seconds) 59 | timeout = 300 60 | 61 | # Parallel execution 62 | # Run with: pytest -n auto 63 | filterwarnings = 64 | ignore::DeprecationWarning 65 | ignore::PendingDeprecationWarning 66 | -------------------------------------------------------------------------------- /src/memorizz/internet_access/providers/offline.py: -------------------------------------------------------------------------------- 1 | """Offline fallback provider for environments without external access.""" 2 | from __future__ import annotations 3 | 4 | from typing import Any, Dict, List 5 | 6 | from ..base import InternetAccessProvider, register_provider 7 | from ..models import InternetPageContent, InternetSearchResult 8 | 9 | 10 | class OfflineInternetProvider(InternetAccessProvider): 11 | """Provider that returns informative placeholders when internet access is disabled.""" 12 | 13 | provider_name = "offline" 14 | 15 | def __init__(self, reason: str = "Internet access provider is not configured"): 16 | super().__init__({"reason": reason}) 17 | self.reason = reason 18 | 19 | def search( 20 | self, query: str, max_results: int = 5, **kwargs: Any 21 | ) -> List[InternetSearchResult]: 22 | message = ( 23 | f"Internet access unavailable: {self.reason}. Configure FIRECRAWL_API_KEY, " 24 | "TAVILY_API_KEY, or MEMORIZZ_DEFAULT_INTERNET_PROVIDER to enable live search." 25 | ) 26 | return [ 27 | InternetSearchResult( 28 | url="", 29 | title="Internet access unavailable", 30 | snippet=message, 31 | metadata={"status": "offline"}, 32 | ) 33 | ] 34 | 35 | def fetch_url(self, url: str, **kwargs: Any) -> InternetPageContent: 36 | message = ( 37 | f"Cannot fetch '{url}' because internet access is disabled. " 38 | "Configure FIRECRAWL_API_KEY, TAVILY_API_KEY, or MEMORIZZ_DEFAULT_INTERNET_PROVIDER " 39 | "to enable browsing." 40 | ) 41 | return InternetPageContent( 42 | url=url, 43 | title="Internet access unavailable", 44 | content=message, 45 | metadata={"status": "offline"}, 46 | ) 47 | 48 | 49 | register_provider(OfflineInternetProvider.provider_name, OfflineInternetProvider) 50 | -------------------------------------------------------------------------------- /src/memorizz/memory_provider/__init__.py: -------------------------------------------------------------------------------- 1 | from ..enums.memory_type import MemoryType 2 | from .base import MemoryProvider 3 | 4 | 5 | # Lazy imports for optional dependencies 6 | def _lazy_import_mongodb(): 7 | """Lazy import MongoDB provider (requires pymongo).""" 8 | try: 9 | from .mongodb import MongoDBProvider 10 | 11 | return MongoDBProvider 12 | except ImportError as e: 13 | raise ImportError( 14 | "MongoDB provider requires pymongo. Install with: pip install pymongo" 15 | ) from e 16 | 17 | 18 | def _lazy_import_oracle(): 19 | """Lazy import Oracle provider (requires oracledb).""" 20 | try: 21 | from .oracle import OracleProvider 22 | 23 | return OracleProvider 24 | except ImportError as e: 25 | raise ImportError( 26 | "Oracle provider requires oracledb. Install with: pip install oracledb" 27 | ) from e 28 | 29 | 30 | # Make providers available via module-level getattr 31 | def __getattr__(name): 32 | if name == "MongoDBProvider": 33 | return _lazy_import_mongodb() 34 | elif name == "OracleProvider": 35 | return _lazy_import_oracle() 36 | elif name in ("FileSystemProvider", "FileSystemConfig"): 37 | try: 38 | from .filesystem import FileSystemConfig, FileSystemProvider 39 | 40 | return ( 41 | FileSystemProvider if name == "FileSystemProvider" else FileSystemConfig 42 | ) 43 | except ImportError as e: 44 | raise ImportError( 45 | "Filesystem provider requires optional dependencies. " 46 | "Install FAISS (pip install faiss-cpu) for vector search support." 47 | ) from e 48 | raise AttributeError(f"module '{__name__}' has no attribute '{name}'") 49 | 50 | 51 | __all__ = [ 52 | "MemoryProvider", 53 | "MongoDBProvider", 54 | "OracleProvider", 55 | "FileSystemProvider", 56 | "FileSystemConfig", 57 | "MemoryType", 58 | ] 59 | -------------------------------------------------------------------------------- /src/memorizz/llms/llm_factory.py: -------------------------------------------------------------------------------- 1 | # src/memorizz/llms/llm_factory.py 2 | 3 | from typing import Any, Dict 4 | 5 | from .azure import AzureOpenAI 6 | from .huggingface import HuggingFaceLLM 7 | from .llm_provider import LLMProvider 8 | from .openai import OpenAI 9 | 10 | 11 | def create_llm_provider(config: Dict[str, Any]) -> LLMProvider: 12 | """ 13 | Factory function to create an LLM provider instance from a configuration dictionary. 14 | 15 | Parameters: 16 | ----------- 17 | config : Dict[str, Any] 18 | A dictionary containing the provider name and its specific parameters. 19 | Example for OpenAI: {"provider": "openai", "model": "gpt-4o"} 20 | Example for Azure: {"provider": "azure", "deployment_name": "my-gpt4"} 21 | 22 | Returns: 23 | -------- 24 | LLMProvider 25 | An instance of the specified LLM provider. 26 | 27 | Raises: 28 | ------- 29 | ValueError 30 | If the provider specified in the config is unknown. 31 | """ 32 | provider_name = config.get("provider", "openai").lower() 33 | if provider_name == "openai": 34 | # Create a copy of the config and remove the 'provider' key 35 | openai_config = config.copy() 36 | openai_config.pop("provider", None) 37 | return OpenAI(**openai_config) 38 | 39 | elif provider_name == "azure": 40 | # Create a copy of the config and remove the 'provider' key 41 | azure_config = config.copy() 42 | azure_config.pop("provider", None) 43 | return AzureOpenAI( 44 | azure_endpoint=azure_config.get("azure_endpoint"), 45 | api_version=azure_config.get("api_version"), 46 | deployment_name=azure_config.get("deployment_name"), 47 | ) 48 | 49 | elif provider_name == "huggingface": 50 | huggingface_config = config.copy() 51 | huggingface_config.pop("provider", None) 52 | return HuggingFaceLLM(**huggingface_config) 53 | 54 | else: 55 | raise ValueError(f"Unknown LLM provider: '{provider_name}'") 56 | -------------------------------------------------------------------------------- /examples/setup_oracle_user.py: -------------------------------------------------------------------------------- 1 | """ 2 | Oracle Database Setup Script (Convenience Wrapper) 3 | 4 | ⚠️ RECOMMENDED: For most users, use the CLI command: 5 | memorizz setup-oracle 6 | 7 | This script is provided as a convenience wrapper for: 8 | - Users who cloned the repository and prefer running Python scripts directly 9 | - Development and testing scenarios 10 | 11 | Setup Methods (in order of recommendation): 12 | 1. CLI Command (Best for pip-installed users): 13 | memorizz setup-oracle 14 | # or 15 | python -m memorizz.cli setup-oracle 16 | 17 | 2. This Script (Good for repo-cloned users): 18 | python examples/setup_oracle_user.py 19 | 20 | 3. Direct Import (For programmatic use): 21 | from memorizz.memory_provider.oracle import setup_oracle_user 22 | setup_oracle_user() 23 | 24 | The setup automatically detects your database configuration: 25 | - Admin mode: Full setup with user creation (local/self-hosted databases) 26 | - User-only mode: Uses existing schema (hosted databases like FreeSQL.com) 27 | """ 28 | 29 | import sys 30 | 31 | # Import from package (works for both pip-installed and repo-cloned users) 32 | try: 33 | from memorizz.memory_provider.oracle import setup_oracle_user 34 | except ImportError: 35 | print("✗ Failed to import setup function from memorizz package.") 36 | print("\nPlease ensure memorizz[oracle] is installed:") 37 | print(" pip install memorizz[oracle]") 38 | print("\nThen use the CLI command (recommended):") 39 | print(" memorizz setup-oracle") 40 | print("\nOr use the Python module:") 41 | print(" python -m memorizz.cli setup-oracle") 42 | sys.exit(1) 43 | 44 | 45 | if __name__ == "__main__": 46 | try: 47 | success = setup_oracle_user() 48 | sys.exit(0 if success else 1) 49 | except KeyboardInterrupt: 50 | print("\n\n⚠ Setup interrupted by user") 51 | sys.exit(1) 52 | except Exception as e: 53 | print(f"\n\n✗ Unexpected error: {e}") 54 | import traceback 55 | 56 | traceback.print_exc() 57 | sys.exit(1) 58 | -------------------------------------------------------------------------------- /src/memorizz/long_term_memory/semantic/entity_memory/README.md: -------------------------------------------------------------------------------- 1 | # Entity Memory Module 2 | 3 | Entity memory provides structured long-term storage for facts about specific people, 4 | organizations, products, or other named entities. Each entity is stored as a record of 5 | attribute–value pairs plus optional relations to other entities so agents can recall and 6 | update stable facts over time. 7 | 8 | ## Features 9 | 10 | - Store entities with typed attributes, confidence scores, provenance, and timestamps 11 | - Link entities together via labeled relations (e.g., *coworker*, *purchased*) 12 | - Vector-searchable using the combined attribute text for natural-language lookup 13 | - Memory-ID aware so facts can be scoped to a specific user, tenant, or agent 14 | - Convenience helpers for recording single attributes, retrieving profiles, and 15 | attaching relations 16 | 17 | ## Usage 18 | 19 | ```python 20 | from memorizz.long_term_memory.semantic.entity_memory import EntityMemory 21 | from memorizz.memory_provider.mongodb import MongoDBProvider, MongoDBConfig 22 | 23 | provider = MongoDBProvider(MongoDBConfig("mongodb://localhost:27017")) 24 | entity_store = EntityMemory(provider) 25 | 26 | # Create or update an entity 27 | entity_id = entity_store.upsert_entity( 28 | name="Avery Stone", 29 | entity_type="customer", 30 | memory_id="tenant-123", 31 | attributes=[{"name": "preferred_language", "value": "Japanese", "confidence": 0.95}], 32 | ) 33 | 34 | # Record a new fact without building the full payload 35 | entity_store.record_attribute( 36 | entity_id=entity_id, 37 | attribute_name="favorite_product", 38 | attribute_value="Nebula Pro Drone", 39 | source="support_chat", 40 | ) 41 | 42 | # Look up relevant entities for a query 43 | matches = entity_store.search_entities("user who likes the drone", memory_id="tenant-123") 44 | ``` 45 | 46 | The module intentionally mirrors the layout of other long-term memory components (such 47 | as the knowledge base and persona modules) so it can be attached to `MemAgent` 48 | instances or used standalone. 49 | -------------------------------------------------------------------------------- /setup_dev.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Memorizz Development Environment Setup Script 4 | 5 | set -e 6 | 7 | echo "======================================================================" 8 | echo " Memorizz Development Environment Setup" 9 | echo "======================================================================" 10 | echo "" 11 | 12 | # Check if we're in a conda environment 13 | if [ -n "$CONDA_DEFAULT_ENV" ]; then 14 | echo "✓ Conda environment detected: $CONDA_DEFAULT_ENV" 15 | else 16 | echo "⚠ Warning: No conda environment detected" 17 | echo " Consider creating one: conda create -n memorizz python=3.11" 18 | fi 19 | echo "" 20 | 21 | # Install package in editable mode 22 | echo "Step 1: Installing Memorizz in editable mode..." 23 | pip install -e . 24 | echo "✓ Package installed" 25 | echo "" 26 | 27 | # Install development dependencies 28 | echo "Step 2: Installing development dependencies..." 29 | pip install pre-commit black flake8 isort pytest ipython jupyter 30 | echo "✓ Development dependencies installed" 31 | echo "" 32 | 33 | # Install pre-commit hooks 34 | echo "Step 3: Setting up pre-commit hooks..." 35 | pre-commit install 36 | echo "✓ Pre-commit hooks installed" 37 | echo "" 38 | 39 | # Run initial format 40 | echo "Step 4: Running initial code formatting..." 41 | black src/memorizz --quiet || true 42 | isort src/memorizz --profile black --quiet || true 43 | echo "✓ Code formatted" 44 | echo "" 45 | 46 | # Check syntax 47 | echo "Step 5: Checking Python syntax..." 48 | find src/memorizz -name "*.py" -exec python -m py_compile {} \; 49 | echo "✓ Syntax check passed" 50 | echo "" 51 | 52 | echo "======================================================================" 53 | echo " ✅ Development environment setup complete!" 54 | echo "======================================================================" 55 | echo "" 56 | echo "Useful commands:" 57 | echo " make help - Show all available commands" 58 | echo " make lint - Check code quality" 59 | echo " make format - Format code" 60 | echo " make test - Run tests" 61 | echo "" 62 | echo "Git hooks are now active - code will be checked before each commit!" 63 | echo "" 64 | -------------------------------------------------------------------------------- /docs/memory-providers/oracle.md: -------------------------------------------------------------------------------- 1 | # Oracle Provider 2 | 3 | The Oracle AI Database provider offers fully managed JSON + vector storage for every MemoRizz memory type. It targets Oracle 23ai/26ai and lives in `src/memorizz/memory_provider/oracle/`. 4 | 5 | ## Highlights 6 | 7 | - Native VECTOR datatype with automatic HNSW indexes 8 | - Connection pooling + lazy schema creation 9 | - Works with JSON Relational Duality Views for structured + vector queries 10 | 11 | ## Installation 12 | 13 | ```bash 14 | pip install -e ".[oracle]" 15 | ``` 16 | 17 | ## Configuration 18 | 19 | ```python 20 | from memorizz.memory_provider.oracle import OracleProvider, OracleConfig 21 | 22 | provider = OracleProvider(OracleConfig( 23 | user="memorizz_user", 24 | password="SecurePass123!", 25 | dsn="localhost:1521/FREEPDB1", 26 | schema="MEMORIZZ", 27 | embedding_provider="openai", 28 | embedding_config={"model": "text-embedding-3-small"}, 29 | lazy_vector_indexes=False, 30 | )) 31 | ``` 32 | 33 | Set `lazy_vector_indexes=True` if you want faster cold starts and are ok with indexes being created on demand. 34 | 35 | ## Database Prep 36 | 37 | 1. Create a dedicated user with `CREATE SESSION`, `CREATE TABLE`, `CREATE INDEX`, `UNLIMITED TABLESPACE`. 38 | 2. Grant `EXECUTE ON DBMS_VECTOR` for vector search. 39 | 3. Run `memorizz setup-oracle` or the scripts in `src/memorizz/memory_provider/oracle/` to create the tables. 40 | 41 | ## Tables 42 | 43 | Every memory bucket gets its own table plus a VECTOR index: 44 | 45 | - `personas` 46 | - `toolbox` 47 | - `long_term_memory` 48 | - `entity_memory` 49 | - `short_term_memory` 50 | - `conversation_memory` 51 | - `workflow_memory` 52 | - `shared_memory` 53 | - `summaries` 54 | - `semantic_cache` 55 | 56 | ## Troubleshooting 57 | 58 | - **Vector datatype missing** – Ensure you're running 23ai+ and have `DBMS_VECTOR` privileges. 59 | - **Connection refused** – Use Easy Connect Plus (`host:port/service`) or TNS alias strings. 60 | - **Slow cold start** – Enable `lazy_vector_indexes` or pre-create indexes manually using the SQL files in the provider folder. 61 | 62 | For the full reference, open `src/memorizz/memory_provider/oracle/README.md`. 63 | -------------------------------------------------------------------------------- /tests/unit/test_tavily_provider.py: -------------------------------------------------------------------------------- 1 | """Unit tests for the Tavily internet provider.""" 2 | 3 | from __future__ import annotations 4 | 5 | from unittest.mock import MagicMock 6 | 7 | import pytest 8 | 9 | from memorizz.internet_access.providers.tavily import TavilyProvider 10 | 11 | 12 | @pytest.mark.unit 13 | def test_tavily_search_normalizes_results(): 14 | provider = TavilyProvider( 15 | api_key="test-key", 16 | base_url="https://api.tavily.com", 17 | config={"include_raw_results": True}, 18 | ) 19 | provider._post = MagicMock( 20 | return_value={ 21 | "results": [ 22 | { 23 | "url": "https://example.com/doc", 24 | "title": "Example Doc", 25 | "content": "Snippet", 26 | "score": 0.8, 27 | "site": "example.com", 28 | "published_date": "2024-01-01", 29 | } 30 | ] 31 | } 32 | ) 33 | 34 | results = provider.search("Example query", max_results=2) 35 | 36 | assert len(results) == 1 37 | assert results[0].url == "https://example.com/doc" 38 | assert results[0].metadata["site"] == "example.com" 39 | assert results[0].raw["title"] == "Example Doc" 40 | provider._post.assert_called_once() 41 | 42 | 43 | @pytest.mark.unit 44 | def test_tavily_fetch_truncates_content_and_returns_raw(): 45 | provider = TavilyProvider( 46 | api_key="test-key", 47 | base_url="https://api.tavily.com", 48 | config={"max_content_chars": 20, "include_raw_page": True}, 49 | ) 50 | provider._post = MagicMock( 51 | return_value={ 52 | "results": [ 53 | { 54 | "url": "https://example.com/doc", 55 | "title": "Example Doc", 56 | "content": "A" * 40, 57 | "metadata": {"lang": "en"}, 58 | "site": "example.com", 59 | } 60 | ] 61 | } 62 | ) 63 | 64 | page = provider.fetch_url("https://example.com/doc") 65 | 66 | assert page.metadata["content_truncated"] is True 67 | assert page.metadata["content_returned_characters"] == 20 68 | assert len(page.content) == 20 69 | assert page.raw["title"] == "Example Doc" 70 | -------------------------------------------------------------------------------- /eval/README.md: -------------------------------------------------------------------------------- 1 | # Memorizz Evaluation Framework 2 | 3 | This directory contains evaluation scripts and benchmarks for testing Memorizz's memory capabilities across various tasks and scenarios. 4 | 5 | ## Structure 6 | 7 | ``` 8 | eval/ 9 | ├── README.md # This file 10 | ├── longmemeval/ # LongMemEval benchmark evaluation 11 | │ ├── evaluate_memorizz.py # Main evaluation script 12 | │ └── README.md # LongMemEval specific documentation 13 | └── [future benchmarks]/ # Additional evaluation frameworks 14 | ``` 15 | 16 | ## Overview 17 | 18 | The evaluation framework is designed to assess Memorizz's performance on various memory-related tasks, providing objective metrics to track improvements and compare against other agent memory systems. 19 | 20 | ## Available Benchmarks 21 | 22 | ### LongMemEval 23 | LongMemEval is a comprehensive benchmark for evaluating long-term memory capabilities of chat assistants. It tests five core memory abilities: 24 | 25 | 1. **Information Extraction** - Recalling specific information from extensive histories 26 | 2. **Multi-Session Reasoning** - Synthesizing information across multiple conversation sessions 27 | 3. **Knowledge Updates** - Recognizing and updating changed user information over time 28 | 4. **Temporal Reasoning** - Understanding time-aware aspects of information 29 | 5. **Abstention** - Knowing when to refuse answering based on insufficient information 30 | 31 | ## Quick Start 32 | 33 | 1. Install dependencies: 34 | ```bash 35 | pip install datasets transformers openai 36 | ``` 37 | 38 | 2. Set up environment variables: 39 | ```bash 40 | export OPENAI_API_KEY="your_openai_api_key" 41 | export MONGODB_URI="your_mongodb_uri" 42 | ``` 43 | 44 | 3. Run LongMemEval evaluation: 45 | ```bash 46 | cd eval/longmemeval 47 | python evaluate_memorizz.py 48 | ``` 49 | 50 | ## Adding New Benchmarks 51 | 52 | To add a new evaluation benchmark: 53 | 54 | 1. Create a new directory under `eval/` 55 | 2. Implement an evaluation script that follows the pattern in `longmemeval/evaluate_memorizz.py` 56 | 3. Update this README with documentation for your benchmark 57 | 4. Add any necessary dependencies to the project requirements 58 | 59 | ## Results 60 | 61 | Evaluation results will be saved in JSON format with timestamps, allowing for easy tracking of performance improvements over time. -------------------------------------------------------------------------------- /install_oracle_client.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Simple Oracle Instant Client Installer for macOS 3 | # Usage: ./install_oracle_client.sh 4 | 5 | set -e 6 | 7 | INSTALL_DIR="$HOME/oracle/instantclient" 8 | DOWNLOADS_DIR="$HOME/Downloads" 9 | 10 | echo "🔍 Looking for Oracle Instant Client ZIP in Downloads..." 11 | 12 | # Find the downloaded ZIP file 13 | ZIP_FILE=$(find "$DOWNLOADS_DIR" -name "instantclient-basic-macos.arm64-*.zip" -o -name "instantclient-basic-macosx.x86-64-*.zip" 2>/dev/null | head -1) 14 | 15 | if [ -z "$ZIP_FILE" ]; then 16 | echo "❌ Oracle Instant Client ZIP not found in Downloads folder" 17 | echo "" 18 | echo "📥 Please download it first:" 19 | echo " Apple Silicon: https://www.oracle.com/database/technologies/instant-client/macos-arm64-downloads.html" 20 | echo " Intel Mac: https://www.oracle.com/database/technologies/instant-client/macosx-x86-64-downloads.html" 21 | echo "" 22 | echo " Download the 'Basic Package' ZIP file, then run this script again." 23 | exit 1 24 | fi 25 | 26 | echo "✅ Found: $(basename "$ZIP_FILE")" 27 | echo "📦 Extracting to $INSTALL_DIR..." 28 | 29 | # Create directory and extract 30 | mkdir -p "$(dirname "$INSTALL_DIR")" 31 | unzip -q "$ZIP_FILE" -d "$(dirname "$INSTALL_DIR")" 32 | 33 | # Find the extracted directory (version number may vary) 34 | EXTRACTED_DIR=$(find "$(dirname "$INSTALL_DIR")" -type d -name "instantclient_*" | head -1) 35 | 36 | if [ -z "$EXTRACTED_DIR" ]; then 37 | echo "❌ Extraction failed or directory not found" 38 | exit 1 39 | fi 40 | 41 | echo "✅ Extracted to: $EXTRACTED_DIR" 42 | 43 | # Add to .zshrc 44 | if ! grep -q "DYLD_LIBRARY_PATH.*instantclient" ~/.zshrc 2>/dev/null; then 45 | echo "" >> ~/.zshrc 46 | echo "# Oracle Instant Client" >> ~/.zshrc 47 | echo "export DYLD_LIBRARY_PATH=$EXTRACTED_DIR:\$DYLD_LIBRARY_PATH" >> ~/.zshrc 48 | echo "✅ Added to ~/.zshrc" 49 | else 50 | echo "⚠️ DYLD_LIBRARY_PATH already configured in ~/.zshrc" 51 | fi 52 | 53 | # Source it for current session 54 | export DYLD_LIBRARY_PATH="$EXTRACTED_DIR:$DYLD_LIBRARY_PATH" 55 | 56 | echo "" 57 | echo "✅ Installation complete!" 58 | echo "" 59 | echo "📝 To use in Python:" 60 | echo " import oracledb" 61 | echo " oracledb.init_oracle_client(lib_dir=\"$EXTRACTED_DIR\")" 62 | echo "" 63 | echo "💡 Restart your terminal or run: source ~/.zshrc" 64 | -------------------------------------------------------------------------------- /docs/getting-started/overview.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | MemoRizz is a composable memory framework for AI agents. It ships opinionated agent builders, configurable memory providers, and a cognitive-inspired architecture so that every memory you store is intentional. 4 | 5 | ## Architecture at a Glance 6 | 7 | ``` 8 | src/memorizz/ 9 | ├── long_term_memory/ # semantic, procedural, episodic systems 10 | ├── short_term_memory/ # semantic cache + working memory 11 | ├── coordination/ # shared memory for multi-agent orchestration 12 | ├── memory_provider/ # Oracle, MongoDB, custom backends 13 | └── memagent/ # builders + runtime orchestration 14 | ``` 15 | 16 | Each folder owns the implementation for a specific memory subsystem. Agent presets ("application modes") simply select the right combination of these subsystems. 17 | 18 | ## Key Capabilities 19 | 20 | | Capability | Description | Code Entry Point | 21 | |------------|-------------|------------------| 22 | | Long-term semantic memory | Fact + entity graph storage with embeddings | `long_term_memory/semantic/` 23 | | Procedural memory | Toolboxes and workflows for behavior execution | `long_term_memory/procedural/` 24 | | Episodic memory | Conversation history, summaries, and experiences | `long_term_memory/episodic/` 25 | | Short-term memory | Working context buffer + semantic cache | `short_term_memory/` 26 | | Memory providers | Database-specific persistence logic | `memory_provider/` 27 | | Application modes | Pre-bundled stacks per use case | `enums/application_mode.py` 28 | 29 | !!! tip "Map docs to code" 30 | Every section in this site mirrors these modules. When you update a doc, link back to the concrete module (for example ``::: memorizz.memagent.builders.MemAgentBuilder``) so the rendered API reference always matches the running code. 31 | 32 | ## Requirements 33 | 34 | - Python 3.7+ 35 | - An embedding/LLM provider such as OpenAI or Hugging Face 36 | - A memory provider backend (Oracle 23ai/26ai, MongoDB, or your own `MemoryProvider` implementation) 37 | 38 | ## Next Steps 39 | 40 | 1. Read through the [Concepts](concepts.md) page to understand each memory type. 41 | 2. Pick a provider under [Memory Providers](../memory-providers/oracle.md) and configure credentials. 42 | 3. Follow the [Python SDK Quickstart](python-sdk-quickstart.md) to spin up your first `MemAgent`. 43 | -------------------------------------------------------------------------------- /.venv/bin/activate: -------------------------------------------------------------------------------- 1 | # This file must be used with "source bin/activate" *from bash* 2 | # you cannot run it directly 3 | 4 | deactivate () { 5 | # reset old environment variables 6 | if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then 7 | PATH="${_OLD_VIRTUAL_PATH:-}" 8 | export PATH 9 | unset _OLD_VIRTUAL_PATH 10 | fi 11 | if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then 12 | PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}" 13 | export PYTHONHOME 14 | unset _OLD_VIRTUAL_PYTHONHOME 15 | fi 16 | 17 | # This should detect bash and zsh, which have a hash command that must 18 | # be called to get it to forget past commands. Without forgetting 19 | # past commands the $PATH changes we made may not be respected 20 | if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then 21 | hash -r 22 | fi 23 | 24 | if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then 25 | PS1="${_OLD_VIRTUAL_PS1:-}" 26 | export PS1 27 | unset _OLD_VIRTUAL_PS1 28 | fi 29 | 30 | unset VIRTUAL_ENV 31 | if [ ! "${1:-}" = "nondestructive" ] ; then 32 | # Self destruct! 33 | unset -f deactivate 34 | fi 35 | } 36 | 37 | # unset irrelevant variables 38 | deactivate nondestructive 39 | 40 | VIRTUAL_ENV="/Users/richmondalake/Desktop/memorizz/.venv" 41 | export VIRTUAL_ENV 42 | 43 | _OLD_VIRTUAL_PATH="$PATH" 44 | PATH="$VIRTUAL_ENV/bin:$PATH" 45 | export PATH 46 | 47 | # unset PYTHONHOME if set 48 | # this will fail if PYTHONHOME is set to the empty string (which is bad anyway) 49 | # could use `if (set -u; : $PYTHONHOME) ;` in bash 50 | if [ -n "${PYTHONHOME:-}" ] ; then 51 | _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}" 52 | unset PYTHONHOME 53 | fi 54 | 55 | if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then 56 | _OLD_VIRTUAL_PS1="${PS1:-}" 57 | if [ "x(.venv) " != x ] ; then 58 | PS1="(.venv) ${PS1:-}" 59 | else 60 | if [ "`basename \"$VIRTUAL_ENV\"`" = "__" ] ; then 61 | # special case for Aspen magic directories 62 | # see http://www.zetadev.com/software/aspen/ 63 | PS1="[`basename \`dirname \"$VIRTUAL_ENV\"\``] $PS1" 64 | else 65 | PS1="(`basename \"$VIRTUAL_ENV\"`)$PS1" 66 | fi 67 | fi 68 | export PS1 69 | fi 70 | 71 | # This should detect bash and zsh, which have a hash command that must 72 | # be called to get it to forget past commands. Without forgetting 73 | # past commands the $PATH changes we made may not be respected 74 | if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then 75 | hash -r 76 | fi 77 | -------------------------------------------------------------------------------- /src/memorizz/memagent/models.py: -------------------------------------------------------------------------------- 1 | """Data models for MemAgent configuration and state.""" 2 | 3 | from typing import Any, Dict, List, Optional, Union 4 | 5 | from pydantic import BaseModel, Field 6 | 7 | from .constants import DEFAULT_INSTRUCTION, DEFAULT_MAX_STEPS, DEFAULT_TOOL_ACCESS 8 | 9 | 10 | class MemAgentModel(BaseModel): 11 | """Data model for persisting and loading MemAgent configuration.""" 12 | 13 | model: Optional[Any] = None 14 | llm_config: Optional[Dict[str, Any]] = None # Configuration for the LLM 15 | agent_id: Optional[str] = None 16 | tools: Optional[Union[List, Any]] = None 17 | persona: Optional[Any] = None 18 | instruction: Optional[str] = Field(default=DEFAULT_INSTRUCTION) 19 | application_mode: Optional[str] = "assistant" 20 | memory_types: Optional[ 21 | List[str] 22 | ] = None # Custom memory types that override application_mode defaults 23 | max_steps: int = Field(default=DEFAULT_MAX_STEPS) 24 | memory_ids: Optional[List[str]] = None 25 | tool_access: Optional[str] = Field(default=DEFAULT_TOOL_ACCESS) 26 | long_term_memory_ids: Optional[List[str]] = None 27 | delegates: Optional[List[str]] = None # Store delegate agent IDs 28 | embedding_config: Optional[Dict[str, Any]] = None 29 | semantic_cache: Optional[bool] = False # Enable semantic cache 30 | semantic_cache_config: Optional[ 31 | Union[Any, Dict[str, Any]] 32 | ] = None # Semantic cache configuration 33 | context_window_tokens: Optional[int] = None 34 | internet_access_provider: Optional[str] = None 35 | internet_access_config: Optional[Dict[str, Any]] = None 36 | 37 | model_config = { 38 | "arbitrary_types_allowed": True # Allow arbitrary types like Toolbox 39 | } 40 | 41 | 42 | class MemAgentConfig: 43 | """Configuration helper for MemAgent initialization.""" 44 | 45 | def __init__( 46 | self, 47 | instruction: str = DEFAULT_INSTRUCTION, 48 | max_steps: int = DEFAULT_MAX_STEPS, 49 | tool_access: str = DEFAULT_TOOL_ACCESS, 50 | semantic_cache: bool = False, 51 | **kwargs, 52 | ): 53 | self.instruction = instruction 54 | self.max_steps = max_steps 55 | self.tool_access = tool_access 56 | self.semantic_cache = semantic_cache 57 | 58 | # Store additional configuration 59 | for key, value in kwargs.items(): 60 | setattr(self, key, value) 61 | 62 | def to_dict(self) -> Dict[str, Any]: 63 | """Convert configuration to dictionary.""" 64 | return {k: v for k, v in self.__dict__.items() if not k.startswith("_")} 65 | -------------------------------------------------------------------------------- /docs/getting-started/python-sdk-quickstart.md: -------------------------------------------------------------------------------- 1 | # Python SDK Quickstart 2 | 3 | This walkthrough spins up a fully stateful agent with Oracle as the backing provider. Swap in another provider if you prefer MongoDB or a custom backend. 4 | 5 | ## 1. Install Dependencies 6 | 7 | ```bash 8 | pip install -e ".[docs]" # documentation + tooling 9 | pip install -e ".[oracle]" # choose oracle/mongodb/ollama/etc. as needed 10 | ``` 11 | 12 | Add or export your provider + LLM credentials (see `.env.example`). 13 | 14 | ## 2. Bootstrap Oracle (optional) 15 | 16 | ```bash 17 | ./install_oracle.sh # starts Oracle 23ai locally 18 | memorizz setup-oracle # prepares schemas and tables 19 | ``` 20 | 21 | The setup script automatically creates the JSON + vector tables for every memory bucket (personas, long-term memory, semantic cache, etc.). 22 | 23 | ## 3. Configure Embeddings 24 | 25 | ```python 26 | from memorizz.embeddings import configure_embeddings 27 | 28 | configure_embeddings("openai", { 29 | "model": "text-embedding-3-small", 30 | "api_key": os.environ["OPENAI_API_KEY"], 31 | }) 32 | ``` 33 | 34 | ## 4. Build an Agent 35 | 36 | ```python 37 | from memorizz.memory_provider.oracle import OracleProvider, OracleConfig 38 | from memorizz.memagent.builders import MemAgentBuilder 39 | 40 | oracle_provider = OracleProvider( 41 | OracleConfig( 42 | user="memorizz_user", 43 | password="SecurePass123!", 44 | dsn="localhost:1521/FREEPDB1", 45 | embedding_provider="openai", 46 | ) 47 | ) 48 | 49 | agent = (MemAgentBuilder() 50 | .with_instruction("You are a helpful assistant with persistent memory.") 51 | .with_memory_provider(oracle_provider) 52 | .with_llm_config({ 53 | "provider": "openai", 54 | "model": "gpt-4o-mini", 55 | "api_key": os.environ["OPENAI_API_KEY"], 56 | }) 57 | .build()) 58 | ``` 59 | 60 | ## 5. Run and Inspect Memory 61 | 62 | ```python 63 | response = agent.run("Hello, my name is Leah and I like dark mode UIs.") 64 | print(response) 65 | 66 | # Save a structured entity profile 67 | agent.memory.entity_memory.upsert( 68 | entity_id="leah", 69 | attributes={"preferences": ["dark mode UIs", "Python"]} 70 | ) 71 | ``` 72 | 73 | Check your provider (Oracle, MongoDB) to see the stored JSON, embeddings, and metadata for each memory bucket. 74 | 75 | ## Where to Go Next 76 | 77 | - Review every memory subsystem under [Memory Types](../memory-types/semantic.md). 78 | - Point a different provider at the agent with `MemAgentBuilder().with_memory_provider(...)`. 79 | - Embed API docs inline with ``::: memorizz.memagent.memagent.MemAgent`` to expose parameters inside this site. 80 | -------------------------------------------------------------------------------- /.venv/bin/activate.fish: -------------------------------------------------------------------------------- 1 | # This file must be used with ". bin/activate.fish" *from fish* (http://fishshell.org) 2 | # you cannot run it directly 3 | 4 | function deactivate -d "Exit virtualenv and return to normal shell environment" 5 | # reset old environment variables 6 | if test -n "$_OLD_VIRTUAL_PATH" 7 | set -gx PATH $_OLD_VIRTUAL_PATH 8 | set -e _OLD_VIRTUAL_PATH 9 | end 10 | if test -n "$_OLD_VIRTUAL_PYTHONHOME" 11 | set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME 12 | set -e _OLD_VIRTUAL_PYTHONHOME 13 | end 14 | 15 | if test -n "$_OLD_FISH_PROMPT_OVERRIDE" 16 | functions -e fish_prompt 17 | set -e _OLD_FISH_PROMPT_OVERRIDE 18 | functions -c _old_fish_prompt fish_prompt 19 | functions -e _old_fish_prompt 20 | end 21 | 22 | set -e VIRTUAL_ENV 23 | if test "$argv[1]" != "nondestructive" 24 | # Self destruct! 25 | functions -e deactivate 26 | end 27 | end 28 | 29 | # unset irrelevant variables 30 | deactivate nondestructive 31 | 32 | set -gx VIRTUAL_ENV "/Users/richmondalake/Desktop/memorizz/.venv" 33 | 34 | set -gx _OLD_VIRTUAL_PATH $PATH 35 | set -gx PATH "$VIRTUAL_ENV/bin" $PATH 36 | 37 | # unset PYTHONHOME if set 38 | if set -q PYTHONHOME 39 | set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME 40 | set -e PYTHONHOME 41 | end 42 | 43 | if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" 44 | # fish uses a function instead of an env var to generate the prompt. 45 | 46 | # save the current fish_prompt function as the function _old_fish_prompt 47 | functions -c fish_prompt _old_fish_prompt 48 | 49 | # with the original prompt function renamed, we can override with our own. 50 | function fish_prompt 51 | # Save the return status of the last command 52 | set -l old_status $status 53 | 54 | # Prompt override? 55 | if test -n "(.venv) " 56 | printf "%s%s" "(.venv) " (set_color normal) 57 | else 58 | # ...Otherwise, prepend env 59 | set -l _checkbase (basename "$VIRTUAL_ENV") 60 | if test $_checkbase = "__" 61 | # special case for Aspen magic directories 62 | # see http://www.zetadev.com/software/aspen/ 63 | printf "%s[%s]%s " (set_color -b blue white) (basename (dirname "$VIRTUAL_ENV")) (set_color normal) 64 | else 65 | printf "%s(%s)%s" (set_color -b blue white) (basename "$VIRTUAL_ENV") (set_color normal) 66 | end 67 | end 68 | 69 | # Restore the return status of the previous command. 70 | echo "exit $old_status" | . 71 | _old_fish_prompt 72 | end 73 | 74 | set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV" 75 | end 76 | -------------------------------------------------------------------------------- /src/memorizz/llms/llm_provider.py: -------------------------------------------------------------------------------- 1 | # src/memorizz/llms/llm_provider.py 2 | 3 | from typing import ( 4 | TYPE_CHECKING, 5 | Any, 6 | Callable, 7 | Dict, 8 | List, 9 | Optional, 10 | Protocol, 11 | runtime_checkable, 12 | ) 13 | 14 | # Use TYPE_CHECKING to handle forward references for type hints 15 | if TYPE_CHECKING: 16 | pass 17 | 18 | """ 19 | A protocol in Python (introduced in PEP 544 and part of the typing module) defines a structural typing rule. 20 | It specifies a set of methods and properties that a class must implement, 21 | but it does not require inheritance. 22 | 23 | "If it walks like a duck and quacks like a duck, it's probably a duck." 🦆 24 | 25 | """ 26 | 27 | 28 | @runtime_checkable 29 | class LLMProvider(Protocol): 30 | """ 31 | A generic protocol that defines the contract for any LLM provider 32 | to be compatible with both the OpenAI and AzureOpenAI classes. 33 | """ 34 | 35 | # --- Attributes --- 36 | client: Any 37 | """Provides direct access to the underlying API client instance (e.g., openai.OpenAI or openai.AzureOpenAI).""" 38 | 39 | model: str 40 | """Stores the specific model or deployment name as a string (e.g., "gpt-4o").""" 41 | 42 | # --- Methods --- 43 | def get_tool_metadata(self, func: Callable) -> Dict[str, Any]: 44 | """Creates structured metadata (a JSON schema) from a Python function.""" 45 | ... 46 | 47 | def augment_docstring(self, docstring: str) -> str: 48 | """Uses the LLM to enhance a function's docstring with more detail.""" 49 | ... 50 | 51 | def generate_queries(self, docstring: str) -> List[str]: 52 | """Generates a list of example user queries for a given tool.""" 53 | ... 54 | 55 | def generate_text(self, prompt: str, instructions: Optional[str] = None) -> str: 56 | """A high-level method for simple text generation.""" 57 | ... 58 | 59 | def generate( 60 | self, 61 | messages: List[Dict[str, str]], 62 | tools: Optional[List[Dict[str, Any]]] = None, 63 | tool_choice: str = "auto", 64 | ) -> Any: 65 | """Generate a response from a list of messages (chat format), optionally with tool calling.""" 66 | ... 67 | 68 | def get_config(self) -> Dict[str, Any]: 69 | """ 70 | Returns a serializable dictionary of the provider's configuration. 71 | This is used for saving and reconstructing the agent. 72 | """ 73 | ... 74 | 75 | def get_last_usage(self) -> Optional[Dict[str, int]]: 76 | """Return token usage details (prompt/completion/total) from the most recent call.""" 77 | ... 78 | 79 | def get_context_window_tokens(self) -> Optional[int]: 80 | """Return the provider's context window size in tokens, when known.""" 81 | ... 82 | -------------------------------------------------------------------------------- /src/memorizz/internet_access/base.py: -------------------------------------------------------------------------------- 1 | """Base classes for internet access providers.""" 2 | 3 | from __future__ import annotations 4 | 5 | import logging 6 | from abc import ABC, abstractmethod 7 | from typing import Any, Dict, List, Optional 8 | 9 | from .models import InternetPageContent, InternetSearchResult 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class InternetAccessProvider(ABC): 15 | """Interface for providers that offer internet search / browsing.""" 16 | 17 | provider_name: str = "base" 18 | 19 | def __init__(self, config: Optional[Dict[str, Any]] = None): 20 | self._config = config or {} 21 | 22 | def get_provider_name(self) -> str: 23 | """Return the provider name.""" 24 | return getattr(self, "provider_name", self.__class__.__name__).lower() 25 | 26 | def get_config(self) -> Dict[str, Any]: 27 | """Return serializable config information.""" 28 | return dict(self._config) 29 | 30 | @abstractmethod 31 | def search( 32 | self, query: str, max_results: int = 5, **kwargs 33 | ) -> List[InternetSearchResult]: 34 | """Search the internet and return normalized results.""" 35 | 36 | @abstractmethod 37 | def fetch_url(self, url: str, **kwargs) -> InternetPageContent: 38 | """Fetch and parse the contents of a specific URL.""" 39 | 40 | def close(self) -> None: 41 | """Cleanup resources (override when necessary).""" 42 | return None 43 | 44 | 45 | _PROVIDER_REGISTRY: Dict[str, type[InternetAccessProvider]] = {} 46 | 47 | 48 | def register_provider(name: str, provider_cls: type[InternetAccessProvider]) -> None: 49 | """Register an internet access provider by name.""" 50 | _PROVIDER_REGISTRY[name.lower()] = provider_cls 51 | 52 | 53 | def get_provider_class(name: str) -> Optional[type[InternetAccessProvider]]: 54 | """Return the provider class for a given name.""" 55 | if not name: 56 | return None 57 | return _PROVIDER_REGISTRY.get(name.lower()) 58 | 59 | 60 | def create_internet_access_provider( 61 | name: str, config: Optional[Dict[str, Any]] = None 62 | ) -> Optional[InternetAccessProvider]: 63 | """Instantiate a provider from the registry.""" 64 | provider_cls = get_provider_class(name) 65 | if not provider_cls: 66 | logger.warning("Unknown internet access provider: %s", name) 67 | return None 68 | 69 | config = config or {} 70 | try: 71 | return provider_cls(**config) 72 | except TypeError: 73 | try: 74 | return provider_cls(config=config) # type: ignore[arg-type] 75 | except TypeError as exc: 76 | logger.error( 77 | "Failed to initialize provider '%s' with config keys: %s", 78 | name, 79 | list(config.keys()), 80 | ) 81 | raise exc 82 | -------------------------------------------------------------------------------- /docs/getting-started/concepts.md: -------------------------------------------------------------------------------- 1 | # Concepts 2 | 3 | MemoRizz models agent cognition around a handful of composable building blocks. Understanding these types makes it easier to reason about what your application mode actually enables. 4 | 5 | ## Memory Types 6 | 7 | | Enum | Purpose | Realization | 8 | |------|---------|-------------| 9 | | `MemoryType.LONG_TERM_MEMORY` | Semantic knowledge base | Namespaces, personas, entity memory | 10 | | `MemoryType.ENTITY_MEMORY` | Structured profile data tied to entities | Attribute/value store with provenance | 11 | | `MemoryType.TOOLBOX` + `MemoryType.WORKFLOW_MEMORY` | Toolbox and workflow behaviors | `long_term_memory/procedural/` | 12 | | `MemoryType.CONVERSATION_MEMORY` | Episodic timeline of interactions | `long_term_memory/episodic/` 13 | | `MemoryType.SUMMARIES` | Cached digests of long conversations | `long_term_memory/episodic/summaries.py` 14 | | `MemoryType.SHORT_TERM_MEMORY` | Working context window | `short_term_memory/working_memory/` 15 | | `MemoryType.SEMANTIC_CACHE` | Fast, short-lived fact lookups | `short_term_memory/semantic_cache/` 16 | | `MemoryType.SHARED_MEMORY` | Coordination between multiple agents | `coordination/shared_memory/` 17 | 18 | !!! note 19 | The `MemoryType` enum lives in `src/memorizz/enums/memory_type.py`. Extending it is the first step when you want to introduce a new storage primitive. 20 | 21 | ## Memories vs. Providers 22 | 23 | - **Memory types** describe *what* your agent can recall. 24 | - **Memory providers** describe *where* the data lives (Oracle, MongoDB, local experiment, etc.). 25 | - **Application modes** (see `src/memorizz/enums/application_mode.py`) simply select the right combination of memories for a task. For example `ASSISTANT` activates conversation history, long-term facts, personas, and summaries; `DEEP_RESEARCH` focuses on toolbox access and shared memory. 26 | 27 | ## Lifecycle 28 | 29 | 1. **Capture** – Agents persist facts by calling methods on the active memory types (e.g., saving a persona or upserting entity attributes). 30 | 2. **Index** – Providers embed relevant fields using your configured embedding provider. 31 | 3. **Retrieve** – During a run, the `MemAgent` orchestrator fetches relevant rows from each memory and mixes them into the prompt stack. 32 | 4. **Summarize** – Episodic memory periodically compacts older interactions into summary memories that keep the context window manageable while preserving detail. 33 | 34 | ## How to Explore Further 35 | 36 | - Inspect `src/memorizz/MEMORY_ARCHITECTURE.md` for the full architecture notes that ship with the codebase. 37 | - Use `mkdocstrings` directives inside any doc page to render live API reference blocks, e.g. 38 | 39 | ```markdown 40 | ::: memorizz.memagent.builders.MemAgentBuilder 41 | handler: python 42 | ``` 43 | 44 | That directive renders directly from the Python source, so your docs always match the SDK version in the repository. 45 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: MemoRizz 2 | site_description: Documentation for the MemoRizz memory framework for AI agents 3 | site_url: https://richmondalake.github.io/memorizz 4 | repo_name: RichmondAlake/memorizz 5 | repo_url: https://github.com/RichmondAlake/memorizz 6 | edit_uri: edit/main/docs/ 7 | 8 | nav: 9 | - Getting Started: 10 | - Overview: getting-started/overview.md 11 | - Concepts: getting-started/concepts.md 12 | - Python SDK Quickstart: getting-started/python-sdk-quickstart.md 13 | - Memory Types: 14 | - Semantic Memory: memory-types/semantic.md 15 | - Episodic Memory: memory-types/episodic.md 16 | - Procedural Memory: memory-types/procedural.md 17 | - Short-Term Memory: memory-types/short-term.md 18 | - Shared Memory: memory-types/shared.md 19 | - Memory Providers: 20 | - Oracle Provider: memory-providers/oracle.md 21 | - MongoDB Provider: memory-providers/mongodb.md 22 | - Filesystem Provider: memory-providers/filesystem.md 23 | - Bring Your Own Provider: memory-providers/custom.md 24 | - Use Cases: 25 | - Assistant Mode: use-cases/assistant-mode.md 26 | - Workflow Mode: use-cases/workflow-mode.md 27 | - Deep Research Mode: use-cases/deep-research-mode.md 28 | - Internet Access Providers: internet-access/providers.md 29 | - Utilities: 30 | - Context Window Stats: utilities/context_window_stats.md 31 | 32 | theme: 33 | name: material 34 | language: en 35 | features: 36 | - navigation.instant 37 | - navigation.sections 38 | - navigation.tabs 39 | - navigation.top 40 | - navigation.footer 41 | - toc.integrate 42 | - search.suggest 43 | - search.highlight 44 | - content.code.copy 45 | - content.code.annotate 46 | palette: 47 | - scheme: default 48 | primary: deep purple 49 | accent: indigo 50 | - scheme: slate 51 | primary: deep purple 52 | accent: lime 53 | font: 54 | text: "Inter" 55 | code: "JetBrains Mono" 56 | 57 | markdown_extensions: 58 | - admonition 59 | - footnotes 60 | - toc: 61 | permalink: true 62 | - pymdownx.details 63 | - pymdownx.superfences 64 | - pymdownx.tabbed: 65 | alternate_style: true 66 | - pymdownx.snippets 67 | - pymdownx.highlight: 68 | anchor_linenums: true 69 | - pymdownx.inlinehilite 70 | - pymdownx.keys 71 | 72 | plugins: 73 | - search 74 | - git-revision-date-localized: 75 | fallback_to_build_date: true 76 | enable_creation_date: true 77 | - mkdocstrings: 78 | handlers: 79 | python: 80 | paths: [src] 81 | options: 82 | docstring_style: google 83 | show_if_no_docstring: false 84 | filters: [] 85 | heading_level: 2 86 | show_category_heading: true 87 | show_source: true 88 | 89 | extra: 90 | social: 91 | - icon: fontawesome/brands/github 92 | link: https://github.com/RichmondAlake/memorizz 93 | -------------------------------------------------------------------------------- /src/memorizz/internet_access/__init__.py: -------------------------------------------------------------------------------- 1 | """Internet access provider interfaces and implementations.""" 2 | 3 | import logging 4 | import os 5 | 6 | from .base import ( 7 | InternetAccessProvider, 8 | create_internet_access_provider, 9 | get_provider_class, 10 | register_provider, 11 | ) 12 | from .models import InternetPageContent, InternetSearchResult 13 | from .providers.firecrawl import FirecrawlProvider 14 | from .providers.offline import OfflineInternetProvider 15 | from .providers.tavily import TavilyProvider 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | DEFAULT_PROVIDER_ENV = "MEMORIZZ_DEFAULT_INTERNET_PROVIDER" 20 | DEFAULT_PROVIDER_API_KEY_ENV = "MEMORIZZ_DEFAULT_INTERNET_PROVIDER_API_KEY" 21 | 22 | __all__ = [ 23 | "InternetAccessProvider", 24 | "InternetPageContent", 25 | "InternetSearchResult", 26 | "FirecrawlProvider", 27 | "TavilyProvider", 28 | "OfflineInternetProvider", 29 | "create_internet_access_provider", 30 | "register_provider", 31 | "get_provider_class", 32 | "get_default_internet_access_provider", 33 | ] 34 | 35 | 36 | def get_default_internet_access_provider() -> InternetAccessProvider: 37 | """ 38 | Return a usable internet provider for Deep Research agents. 39 | 40 | Preference order: 41 | 1. Explicit provider via MEMORIZZ_DEFAULT_INTERNET_PROVIDER. 42 | 2. Tavily (TAVILY_API_KEY). 43 | 3. Firecrawl (FIRECRAWL_API_KEY). 44 | 4. Offline provider placeholder so the tool still responds. 45 | """ 46 | 47 | provider_name = os.getenv(DEFAULT_PROVIDER_ENV) 48 | provider_config = {} 49 | if provider_name: 50 | api_key = os.getenv(DEFAULT_PROVIDER_API_KEY_ENV) 51 | if api_key: 52 | provider_config["api_key"] = api_key 53 | try: 54 | provider = create_internet_access_provider(provider_name, provider_config) 55 | if provider: 56 | return provider 57 | except Exception as exc: # pragma: no cover - best effort fallback 58 | logger.warning( 59 | "Failed to initialize provider '%s' from env: %s", provider_name, exc 60 | ) 61 | 62 | tavily_key = os.getenv("TAVILY_API_KEY") 63 | if tavily_key: 64 | try: 65 | return TavilyProvider(api_key=tavily_key) 66 | except Exception as exc: # pragma: no cover - best effort fallback 67 | logger.warning("Failed to initialize Tavily provider: %s", exc) 68 | 69 | firecrawl_key = os.getenv("FIRECRAWL_API_KEY") 70 | if firecrawl_key: 71 | try: 72 | return FirecrawlProvider(api_key=firecrawl_key) 73 | except Exception as exc: # pragma: no cover - best effort fallback 74 | logger.warning("Failed to initialize Firecrawl provider: %s", exc) 75 | 76 | reason = ( 77 | "Set TAVILY_API_KEY, FIRECRAWL_API_KEY, or MEMORIZZ_DEFAULT_INTERNET_PROVIDER " 78 | "to enable live internet access." 79 | ) 80 | return OfflineInternetProvider(reason=reason) 81 | -------------------------------------------------------------------------------- /docs/memory-providers/filesystem.md: -------------------------------------------------------------------------------- 1 | # Filesystem Provider 2 | 3 | The filesystem provider persists every MemoRizz memory type as JSON files on disk and uses FAISS for vector similarity search. It is ideal for local development, CI runs, or lightweight deployments where running MongoDB/Oracle would be overkill. 4 | 5 | ## Highlights 6 | 7 | - No external database required—everything lives under the configured root directory. 8 | - Works with the exact same `MemoryProvider` API as Oracle/MongoDB, so agents can swap providers without code changes. 9 | - Optional FAISS acceleration for semantic queries with automatic fallbacks to cosine or keyword search when embeddings are missing. 10 | 11 | ## Installation 12 | 13 | ```bash 14 | pip install memorizz[filesystem] 15 | ``` 16 | 17 | This installs `faiss-cpu`. If you skip the extra, the provider still works but falls back to keyword search until FAISS (and an embedding provider) are available. 18 | 19 | ## Configuration 20 | 21 | ```python 22 | from pathlib import Path 23 | from memorizz.memory_provider import FileSystemConfig, FileSystemProvider 24 | 25 | config = FileSystemConfig( 26 | root_path=Path("~/.memorizz").expanduser(), # Each MemoryType gets its own folder 27 | lazy_vector_indexes=True, # Build FAISS indexes on demand 28 | embedding_provider="openai", # Optional, enables semantic search 29 | embedding_config={"model": "text-embedding-3-small"}, 30 | ) 31 | 32 | provider = FileSystemProvider(config) 33 | ``` 34 | 35 | - `root_path` is the only required field. The provider creates subdirectories named after each `MemoryType`. 36 | - Set `lazy_vector_indexes=True` to skip vector index builds until a semantic query hits a store. 37 | - You can also pass a fully constructed `EmbeddingManager` instance via `embedding_provider` for complete control. 38 | 39 | ## Storage Layout 40 | 41 | ``` 42 | ~/.memorizz/ 43 | ├── conversation_memory/ 44 | │ ├── index.json # Lightweight metadata for quick lookups 45 | │ ├── 4c1d9a2f.json # Individual memory documents 46 | │ └── vector.index (optional) # Saved FAISS index when embeddings are enabled 47 | ├── long_term_memory/ 48 | │ └── … 49 | └── agents/ # Stored MemAgent configurations 50 | ``` 51 | 52 | Each JSON file contains the raw document plus MemoRizz metadata (`_id`, `memory_id`, timestamps, embeddings, etc.). When FAISS is installed, the provider builds an in-memory index and snapshots it to `vector.index` for fast restarts. 53 | 54 | ## Usage Tips 55 | 56 | - **Embeddings optional**: If you only need deterministic lookups (ID/name filters), skip embedding configuration and the provider will stick to metadata filtering/keyword search. 57 | - **Backups**: Because everything is plain JSON, standard tools (`tar`, `rsync`, cloud sync) can back up or relocate memory stores easily. 58 | - **Cleanup**: Call `delete_memagent(..., cascade=True)` to remove all memories tied to an agent (the provider deletes the related JSON files). 59 | 60 | For in-depth details, see `src/memorizz/memory_provider/filesystem/provider.py`. 61 | -------------------------------------------------------------------------------- /src/memorizz/memagent/managers/internet_access_manager.py: -------------------------------------------------------------------------------- 1 | """Manager responsible for routing internet access actions to providers.""" 2 | 3 | from __future__ import annotations 4 | 5 | import logging 6 | from typing import Any, Dict, List, Optional 7 | 8 | from ...internet_access import InternetAccessProvider 9 | from ...internet_access.models import InternetPageContent, InternetSearchResult 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class InternetAccessManager: 15 | """Wrapper over InternetAccessProvider implementations.""" 16 | 17 | def __init__(self, provider: Optional[InternetAccessProvider] = None): 18 | self.provider = provider 19 | 20 | def set_provider( 21 | self, provider: Optional[InternetAccessProvider] 22 | ) -> Optional[InternetAccessProvider]: 23 | """Attach or detach an internet provider.""" 24 | previous = self.provider 25 | if previous and previous is not provider: 26 | try: 27 | previous.close() 28 | except Exception as exc: 29 | logger.debug("Failed to close previous internet provider: %s", exc) 30 | self.provider = provider 31 | return previous 32 | 33 | def is_enabled(self) -> bool: 34 | """Return True if provider is available.""" 35 | return self.provider is not None 36 | 37 | def get_provider_name(self) -> Optional[str]: 38 | if not self.provider: 39 | return None 40 | return self.provider.get_provider_name() 41 | 42 | def get_provider_config(self) -> Optional[Dict[str, Any]]: 43 | if not self.provider: 44 | return None 45 | return self.provider.get_config() 46 | 47 | def search( 48 | self, query: str, max_results: int = 5, **kwargs 49 | ) -> List[Dict[str, Any]]: 50 | """Execute a search query using the provider.""" 51 | if not self.provider: 52 | raise ValueError("Internet access provider is not configured") 53 | results = self.provider.search(query=query, max_results=max_results, **kwargs) 54 | return [self._result_to_dict(item) for item in results] 55 | 56 | def fetch_url(self, url: str, **kwargs) -> Dict[str, Any]: 57 | """Fetch a URL using the provider.""" 58 | if not self.provider: 59 | raise ValueError("Internet access provider is not configured") 60 | page = self.provider.fetch_url(url=url, **kwargs) 61 | return self._page_to_dict(page) 62 | 63 | # Serialization helpers ------------------------------------------------- 64 | def _result_to_dict(self, result: Any) -> Dict[str, Any]: 65 | if isinstance(result, InternetSearchResult): 66 | return result.to_dict() 67 | if isinstance(result, dict): 68 | return result 69 | return {"value": result} 70 | 71 | def _page_to_dict(self, page: Any) -> Dict[str, Any]: 72 | if isinstance(page, InternetPageContent): 73 | return page.to_dict() 74 | if isinstance(page, dict): 75 | return page 76 | return {"content": page} 77 | -------------------------------------------------------------------------------- /src/memorizz/short_term_memory/working_memory/cwm.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | # from ..memagent import MemAgent 4 | from ...enums.memory_type import MemoryType 5 | 6 | 7 | # Can take in an agent and then return a prompt that informs the agent on how to manage the context window 8 | class CWM: 9 | # def __init__(self, agent: MemAgent): 10 | # self.agent = agent 11 | 12 | @staticmethod 13 | def get_prompt_from_memory_types(memory_types: List[MemoryType]): 14 | prompt = "You are an AI Agent endowed with a powerful, multi-tiered memory augmentation system. Your mission is to use all available memory modalities to deliver consistent, accurate, and context-rich responses. The aim is to esure that through augmented memory, you become belivable, capable, and reliable." 15 | 16 | for memory_type in memory_types: 17 | prompt += CWM._generate_prompt_for_memory_type(memory_type) 18 | 19 | return prompt 20 | 21 | @staticmethod 22 | def _generate_prompt_for_memory_type(memory_type: MemoryType): 23 | # Define memory type prompts in a dictionary for better maintainability 24 | memory_prompts = { 25 | MemoryType.CONVERSATION_MEMORY: { 26 | "description": "This is a memory type that stores the conversation history between the agent and the user.", 27 | "usage": "Use this to provide continuity, avoid repeating yourself, and reference prior turns.", 28 | }, 29 | MemoryType.WORKFLOW_MEMORY: { 30 | "description": "This is a memory type that stores the workflow history between the agent and the user.", 31 | "usage": "Use this to provide continuity, avoid repeating yourself, and reference prior turns.", 32 | }, 33 | MemoryType.SHARED_MEMORY: { 34 | "description": "This is a memory type that stores shared blackboard information for multi-agent coordination.", 35 | "usage": "Use this to coordinate with other agents, understand your role in the agent hierarchy, and access shared coordination activities and context.", 36 | }, 37 | MemoryType.SUMMARIES: { 38 | "description": "This is a memory type that stores compressed summaries of past conversations and interactions to preserve important context while managing memory efficiently.", 39 | "usage": "Use these summaries to understand the broader context of your interactions with the user, recall important topics, preferences, and past decisions. This helps you provide more personalized and context-aware responses even when specific conversations are no longer in active memory.", 40 | }, 41 | } 42 | 43 | # Get the prompt configuration for this memory type 44 | prompt_config = memory_prompts.get(memory_type) 45 | 46 | if prompt_config: 47 | prompt = f"\n\nMemory Type: {memory_type.value}\n" 48 | prompt += f"Memory Type Description: {prompt_config['description']}\n" 49 | prompt += f"Memory Type Usage: {prompt_config['usage']}\n" 50 | return prompt 51 | else: 52 | # Handle unknown memory types gracefully 53 | return f"\n\nMemory Type: {memory_type.value}\n" 54 | 55 | 56 | # Can take in an array of memory stores and then return a prompt that informs the agent on how to manage the context window 57 | -------------------------------------------------------------------------------- /docs/utilities/context_window_stats.md: -------------------------------------------------------------------------------- 1 | # Context Window Stats Utility 2 | 3 | Track how much of the model's context window a MemAgent has consumed at any point during a conversation. The agent records usage every time it calls the underlying LLM and makes the latest snapshot accessible through logs and code. 4 | 5 | ## What the Agent Records 6 | 7 | Each snapshot includes: 8 | 9 | - `timestamp`: ISO-8601 timestamp for the measurement 10 | - `stage`: the agent stage that triggered the measurement (e.g., `iteration_1`, `memory_compression`) 11 | - `prompt_tokens`: number of tokens sent to the model 12 | - `completion_tokens`: tokens generated by the model 13 | - `total_tokens`: sum of prompt + completion tokens 14 | - `context_window_tokens`: configured or inferred window size 15 | - `percentage_used`: total usage / context window × 100 16 | 17 | If the provider does not return usage information, MemoRizz falls back to `None` so downstream code can handle missing values gracefully. 18 | 19 | ## Logging 20 | 21 | By default the agent logs each measurement at `INFO` level: 22 | 23 | ``` 24 | Context window usage (iteration_2): 2,350/128,000 tokens (1.84%) | prompt=2,100 completion=250 25 | ``` 26 | 27 | Monitor your existing log stream (e.g., `tail -f app.log`) to watch the token budget drain in real time. 28 | 29 | ## Programmatic Access 30 | 31 | Use `memagent.get_context_window_stats()` to retrieve the most recent snapshot after an interaction: 32 | 33 | ```python 34 | response = agent.run("Summarize the workshop agenda we discussed yesterday.") 35 | 36 | stats = agent.get_context_window_stats() 37 | if stats: 38 | print( 39 | f"Total tokens: {stats['total_tokens']}" 40 | f" ({stats['percentage_used']:.2f}% of {stats['context_window_tokens']})" 41 | ) 42 | else: 43 | print("Provider did not return usage information.") 44 | ``` 45 | 46 | Snapshots are ordinary dictionaries, so you can emit them to observability pipelines, dashboards, or audits. 47 | 48 | ## Configuring the Context Window 49 | 50 | MemoRizz tries to detect the context window automatically: 51 | 52 | 1. Use the explicit `context_window_tokens` argument passed to `MemAgent` or `MemAgentBuilder`. 53 | 2. If not provided, look for `context_window_tokens` / `max_context_tokens` / `context_window` inside `llm_config`. 54 | 3. Fall back to the provider's built-in knowledge (OpenAI & Azure expose known limits; Hugging Face derives the tokenizer limit). 55 | 56 | You can override the inferred value at any time: 57 | 58 | ```python 59 | agent = (MemAgentBuilder() 60 | .with_llm_config({"provider": "openai", "model": "gpt-4o-mini"}) 61 | .with_memory_provider(provider) 62 | .build() 63 | ) 64 | 65 | # Later, adjust for a custom fine-tuned model 66 | agent._context_window_tokens = 32_000 67 | ``` 68 | 69 | > **Tip:** When you know the exact budget (e.g., for a fine-tuned or local model) always pass it explicitly so percentage calculations remain accurate. 70 | 71 | ## Provider Support 72 | 73 | - **OpenAI / Azure OpenAI:** Token usage comes directly from the API response (`response.usage`). 74 | - **Hugging Face:** The provider counts tokens via the active tokenizer (falling back to whitespace splitting when needed). 75 | - **Custom Providers:** Implement the `LLMProvider` protocol’s `get_last_usage()` and `get_context_window_tokens()` methods to plug into the same reporting pipeline. 76 | 77 | With these hooks in place, every MemAgent—single or multi-step—can report how close it is to the model’s context limit, helping you catch runaway prompts before they overflow the window. 78 | -------------------------------------------------------------------------------- /src/memorizz/cli.py: -------------------------------------------------------------------------------- 1 | """CLI commands for Memorizz.""" 2 | 3 | import os 4 | import subprocess 5 | import sys 6 | from pathlib import Path 7 | 8 | 9 | def install_oracle(): 10 | """Install Oracle database using install_oracle.sh script.""" 11 | # Try to find install_oracle.sh script 12 | # Check multiple possible locations 13 | possible_paths = [ 14 | # Current directory (for local development) 15 | Path("install_oracle.sh"), 16 | # Package scripts directory (when installed from PyPI) 17 | Path(__file__).parent / "scripts" / "install_oracle.sh", 18 | # Repository root (if installed in editable mode or running from repo) 19 | Path(__file__).parent.parent.parent / "install_oracle.sh", 20 | # Alternative repository root path 21 | Path(__file__).parent.parent.parent.parent / "install_oracle.sh", 22 | ] 23 | 24 | script_path = None 25 | for path in possible_paths: 26 | if path.exists() and path.is_file(): 27 | script_path = path 28 | break 29 | 30 | if not script_path: 31 | print("✗ install_oracle.sh script not found") 32 | print("\nThe install_oracle.sh script is only available when:") 33 | print(" 1. You've cloned the repository, or") 34 | print(" 2. You're running from the repository directory") 35 | print("\nAlternative: Install Oracle manually with Docker:") 36 | print(" docker run -d --name oracle-memorizz -p 1521:1521 \\") 37 | print(" -e ORACLE_PWD=MyPassword123! \\") 38 | print(" container-registry.oracle.com/database/free:latest-lite") 39 | print("\nOr use the script directly if you have it:") 40 | print(" ./install_oracle.sh") 41 | return False 42 | 43 | # Make script executable 44 | os.chmod(script_path, 0o755) 45 | 46 | # Execute the script 47 | try: 48 | result = subprocess.run( 49 | [str(script_path)], 50 | check=False, # Don't raise exception on non-zero exit 51 | capture_output=False, # Show output in real-time 52 | ) 53 | return result.returncode == 0 54 | except Exception as e: 55 | print(f"✗ Failed to execute install_oracle.sh: {e}") 56 | return False 57 | 58 | 59 | def setup_oracle(): 60 | """Run Oracle database setup.""" 61 | try: 62 | from memorizz.memory_provider.oracle import setup_oracle_user 63 | 64 | return setup_oracle_user() 65 | except ImportError as e: 66 | print(f"✗ Failed to import setup module: {e}") 67 | print("\nPlease ensure memorizz[oracle] is installed:") 68 | print(" pip install memorizz[oracle]") 69 | return False 70 | 71 | 72 | def main(): 73 | """Main CLI entry point.""" 74 | if len(sys.argv) < 2: 75 | print("Memorizz CLI") 76 | print("\nAvailable commands:") 77 | print(" install-oracle Install Oracle database container") 78 | print(" setup-oracle Set up Oracle database schema") 79 | print("\nUsage:") 80 | print(" memorizz install-oracle") 81 | print(" memorizz setup-oracle") 82 | print(" python -m memorizz.cli ") 83 | sys.exit(1) 84 | 85 | command = sys.argv[1] 86 | 87 | if command == "install-oracle": 88 | success = install_oracle() 89 | sys.exit(0 if success else 1) 90 | elif command == "setup-oracle": 91 | success = setup_oracle() 92 | sys.exit(0 if success else 1) 93 | else: 94 | print(f"✗ Unknown command: {command}") 95 | print("Run 'memorizz' or 'python -m memorizz.cli' for help") 96 | sys.exit(1) 97 | 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /src/memorizz/coordination/shared_memory/messages.py: -------------------------------------------------------------------------------- 1 | """Typed message helpers for shared memory coordination.""" 2 | 3 | from __future__ import annotations 4 | 5 | import uuid 6 | from dataclasses import dataclass, field 7 | from datetime import datetime 8 | from enum import Enum 9 | from typing import Any, Dict, List, Optional 10 | 11 | 12 | class SharedMemoryMessageType(str, Enum): 13 | """Supported shared memory message types.""" 14 | 15 | COMMAND = "COMMAND" 16 | STATUS = "STATUS" 17 | REPORT = "REPORT" 18 | QUESTION = "QUESTION" 19 | 20 | 21 | @dataclass 22 | class SharedMemoryMessage: 23 | """Base shared memory message.""" 24 | 25 | message_type: SharedMemoryMessageType 26 | payload: Dict[str, Any] 27 | message_id: str = field(default_factory=lambda: str(uuid.uuid4())) 28 | created_at: str = field(default_factory=lambda: datetime.utcnow().isoformat()) 29 | 30 | def to_dict(self) -> Dict[str, Any]: 31 | """Return serializable payload.""" 32 | return { 33 | "message_id": self.message_id, 34 | "message_type": self.message_type.value, 35 | "created_at": self.created_at, 36 | "payload": self.payload, 37 | } 38 | 39 | 40 | def _validate_fields(data: Dict[str, Any], fields: List[str], message_type: str): 41 | missing = [field for field in fields if not data.get(field)] 42 | if missing: 43 | raise ValueError( 44 | f"{message_type} message missing required fields: {', '.join(missing)}" 45 | ) 46 | 47 | 48 | def create_command_message( 49 | command_id: str, 50 | target_agent_id: str, 51 | instructions: str, 52 | priority: int = 3, 53 | dependencies: Optional[List[str]] = None, 54 | metadata: Optional[Dict[str, Any]] = None, 55 | ) -> SharedMemoryMessage: 56 | """Build a validated COMMAND message payload.""" 57 | payload = { 58 | "command_id": command_id, 59 | "target_agent_id": target_agent_id, 60 | "instructions": instructions, 61 | "priority": priority, 62 | "dependencies": dependencies or [], 63 | "metadata": metadata or {}, 64 | } 65 | _validate_fields( 66 | payload, ["command_id", "target_agent_id", "instructions"], "COMMAND" 67 | ) 68 | return SharedMemoryMessage(SharedMemoryMessageType.COMMAND, payload) 69 | 70 | 71 | def create_status_message( 72 | command_id: str, 73 | agent_id: str, 74 | status: str, 75 | progress: int, 76 | blockers: Optional[str] = None, 77 | summary_ids: Optional[List[str]] = None, 78 | ) -> SharedMemoryMessage: 79 | """Build a validated STATUS message payload.""" 80 | payload = { 81 | "command_id": command_id, 82 | "agent_id": agent_id, 83 | "status": status, 84 | "progress": max(0, min(progress, 100)), 85 | "blockers": blockers, 86 | "summary_ids": summary_ids or [], 87 | } 88 | _validate_fields(payload, ["command_id", "agent_id", "status"], "STATUS") 89 | return SharedMemoryMessage(SharedMemoryMessageType.STATUS, payload) 90 | 91 | 92 | def create_report_message( 93 | command_id: str, 94 | agent_id: str, 95 | findings: str, 96 | citations: Optional[List[str]] = None, 97 | gaps: Optional[List[str]] = None, 98 | summary_ids: Optional[List[str]] = None, 99 | ) -> SharedMemoryMessage: 100 | """Build a validated REPORT message payload.""" 101 | payload = { 102 | "command_id": command_id, 103 | "agent_id": agent_id, 104 | "findings": findings, 105 | "citations": citations or [], 106 | "gaps": gaps or [], 107 | "summary_ids": summary_ids or [], 108 | } 109 | _validate_fields(payload, ["command_id", "agent_id", "findings"], "REPORT") 110 | return SharedMemoryMessage(SharedMemoryMessageType.REPORT, payload) 111 | -------------------------------------------------------------------------------- /tests/unit/test_internet_access.py: -------------------------------------------------------------------------------- 1 | """Tests for internet access integration.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Any, Dict, List 6 | from unittest.mock import MagicMock 7 | 8 | import pytest 9 | 10 | from memorizz.internet_access import ( 11 | InternetAccessProvider, 12 | InternetPageContent, 13 | InternetSearchResult, 14 | register_provider, 15 | ) 16 | from memorizz.memagent.core import MemAgent 17 | from memorizz.memagent.managers.internet_access_manager import InternetAccessManager 18 | from memorizz.memagent.models import MemAgentModel 19 | 20 | 21 | class _DummyProvider(InternetAccessProvider): 22 | provider_name = "dummy-provider" 23 | 24 | def __init__(self, **kwargs): 25 | super().__init__(kwargs) 26 | 27 | def search( 28 | self, query: str, max_results: int = 5, **kwargs 29 | ) -> List[InternetSearchResult]: 30 | return [ 31 | InternetSearchResult( 32 | url=f"https://example.com/{idx}", 33 | title=f"Result {idx}", 34 | snippet=query, 35 | score=1.0, 36 | ) 37 | for idx in range(max_results) 38 | ] 39 | 40 | def fetch_url(self, url: str, **kwargs) -> InternetPageContent: 41 | return InternetPageContent(url=url, title="Example", content="Example body") 42 | 43 | 44 | register_provider(_DummyProvider.provider_name, _DummyProvider) 45 | 46 | 47 | @pytest.mark.unit 48 | def test_internet_access_manager_serializes_results(): 49 | provider = _DummyProvider() 50 | manager = InternetAccessManager(provider) 51 | 52 | results = manager.search("memorizz", max_results=2) 53 | assert len(results) == 2 54 | assert results[0]["url"].startswith("https://example.com/") 55 | 56 | page = manager.fetch_url("https://memorizz.ai") 57 | assert page["content"] == "Example body" 58 | 59 | 60 | @pytest.mark.unit 61 | def test_memagent_registers_internet_tools(): 62 | provider = MagicMock() 63 | provider.get_provider_name.return_value = "dummy" 64 | provider.get_config.return_value = {"api_key": "test"} 65 | provider.search.return_value = [{"url": "https://example.com"}] 66 | provider.fetch_url.return_value = { 67 | "url": "https://example.com", 68 | "content": "Body", 69 | } 70 | 71 | agent = MemAgent(instruction="Internet agent", internet_access_provider=provider) 72 | 73 | assert agent.has_internet_access() is True 74 | assert "internet_search" in agent.tool_manager.tools 75 | assert agent.search_internet("python") 76 | provider.search.assert_called_once() 77 | 78 | 79 | @pytest.mark.unit 80 | def test_memagent_disables_internet_access(): 81 | provider = MagicMock() 82 | provider.get_provider_name.return_value = "dummy" 83 | provider.get_config.return_value = {} 84 | provider.search.return_value = [] 85 | provider.fetch_url.return_value = {} 86 | 87 | agent = MemAgent(instruction="toggle agent", internet_access_provider=provider) 88 | assert agent.has_internet_access() is True 89 | 90 | agent.with_internet_access_provider(None) 91 | assert agent.has_internet_access() is False 92 | assert "internet_search" not in agent.tool_manager.tools 93 | 94 | 95 | @pytest.mark.unit 96 | def test_memagent_load_rehydrated_provider(monkeypatch): 97 | memory_provider = MagicMock() 98 | saved = MemAgentModel( 99 | instruction="Load", 100 | internet_access_provider=_DummyProvider.provider_name, 101 | internet_access_config={"custom": "value"}, 102 | ) 103 | memory_provider.retrieve_memagent.return_value = saved 104 | 105 | agent = MemAgent.load( 106 | agent_id="agent-123", 107 | memory_provider=memory_provider, 108 | ) 109 | 110 | assert agent.has_internet_access() is True 111 | assert agent.get_internet_access_provider_name() == _DummyProvider.provider_name 112 | -------------------------------------------------------------------------------- /src/memorizz/long_term_memory/procedural/persona/README.md: -------------------------------------------------------------------------------- 1 | # Persona Module 2 | 3 | The Persona module provides a framework for creating and managing AI agent personas with specific roles, goals, and backgrounds. This module is part of the Memorizz library, which handles memory management for AI agents. 4 | 5 | ## Features 6 | 7 | - Create personas with predefined or custom roles 8 | - Automatically generate embeddings for semantic search 9 | - Store and retrieve personas from memory providers 10 | - Find similar personas based on semantic similarity 11 | - Generate system prompts based on persona attributes 12 | 13 | ## Usage 14 | 15 | ### Creating a Persona 16 | 17 | ```python 18 | from src.memorizz.long_term_memory.semantic.persona import Persona 19 | from src.memorizz.memory_provider import MemoryProvider 20 | 21 | # Initialize a memory provider 22 | memory_provider = MemoryProvider() 23 | 24 | # Create a new persona 25 | tech_expert = Persona( 26 | name="TechExpert", 27 | role="Technical Support Specialist", 28 | goals="Help users troubleshoot technical issues. Provide clear explanations for complex problems.", 29 | background="An experienced technical support engineer with expertise in software development, networking, and system administration." 30 | ) 31 | 32 | # Create a persona with more personality traits 33 | sarcastic_assistant = Persona( 34 | name="Monday", 35 | role="General", 36 | goals="Provide versatile support with a sarcastic tone. Add humor to interactions.", 37 | background="A cynical but helpful assistant who uses dry wit and gentle teasing while delivering high-quality information." 38 | ) 39 | ``` 40 | 41 | ### Storing Personas 42 | 43 | Once created, personas can be stored in the memory provider for future use: 44 | 45 | ```python 46 | # Store the persona in the memory provider 47 | persona_id = tech_expert.store_persona(memory_provider) 48 | print(f"Stored persona with ID: {persona_id}") 49 | ``` 50 | 51 | ### Generating Persona Prompts 52 | 53 | Personas can generate system prompts for language models: 54 | 55 | ```python 56 | # Generate a prompt that can be used with LLMs 57 | system_prompt = tech_expert.generate_system_prompt_input() 58 | print(system_prompt) 59 | ``` 60 | 61 | ### Retrieving Personas 62 | 63 | Personas can be retrieved by ID: 64 | 65 | ```python 66 | # Retrieve a persona using its ID 67 | retrieved_persona = Persona.retrieve_persona(persona_id, memory_provider) 68 | print(retrieved_persona) 69 | ``` 70 | 71 | Or by semantic similarity to a query: 72 | 73 | ```python 74 | # Find personas matching a specific need 75 | similar_personas = Persona.get_most_similar_persona( 76 | "I need a technical expert who can explain complex concepts simply", 77 | memory_provider, 78 | limit=1 79 | ) 80 | ``` 81 | 82 | ### Using Personas with MemAgents 83 | 84 | Personas can be assigned to MemAgents to control their behavior: 85 | 86 | ```python 87 | from src.memorizz.memagent import MemAgent 88 | 89 | # Create an agent with a specific persona 90 | agent = MemAgent( 91 | model=None, # Will use default model 92 | persona=tech_expert, 93 | instruction="Help users with their technical questions", 94 | memory_provider=memory_provider 95 | ) 96 | 97 | # Or set/change a persona later 98 | agent.set_persona(sarcastic_assistant) 99 | 100 | # Run the agent with its persona influencing responses 101 | response = agent.run("Can you help me fix my computer?") 102 | ``` 103 | 104 | ### Persona Persistence 105 | 106 | Personas are stored with vector embeddings for efficient retrieval: 107 | 108 | ```python 109 | # List all available personas 110 | all_personas = memory_provider.list_all(memory_type=MemoryType.PERSONA) 111 | 112 | # Delete a persona 113 | memory_provider.delete_by_id(persona_id, memory_type=MemoryType.PERSONA) 114 | ``` 115 | 116 | ## Implementation Notes 117 | 118 | - Persona embeddings are generated from their attributes for semantic search 119 | - The system automatically converts personas to appropriate prompts for language models 120 | - Personas can be used across multiple agents for consistent behavior 121 | - Custom persona attributes can be added beyond the basic required fields 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /src/memorizz/long_term_memory/semantic/persona/README.md: -------------------------------------------------------------------------------- 1 | # Persona Module 2 | 3 | The Persona module provides a framework for creating and managing AI agent personas with specific roles, goals, and backgrounds. This module is part of the Memorizz library, which handles memory management for AI agents. 4 | 5 | ## Features 6 | 7 | - Create personas with predefined or custom roles 8 | - Automatically generate embeddings for semantic search 9 | - Store and retrieve personas from memory providers 10 | - Find similar personas based on semantic similarity 11 | - Generate system prompts based on persona attributes 12 | 13 | ## Usage 14 | 15 | ### Creating a Persona 16 | 17 | ```python 18 | from src.memorizz.long_term_memory.procedural.persona import Persona 19 | from src.memorizz.memory_provider import MemoryProvider 20 | 21 | # Initialize a memory provider 22 | memory_provider = MemoryProvider() 23 | 24 | # Create a new persona 25 | tech_expert = Persona( 26 | name="TechExpert", 27 | role="Technical Support Specialist", 28 | goals="Help users troubleshoot technical issues. Provide clear explanations for complex problems.", 29 | background="An experienced technical support engineer with expertise in software development, networking, and system administration." 30 | ) 31 | 32 | # Create a persona with more personality traits 33 | sarcastic_assistant = Persona( 34 | name="Monday", 35 | role="General", 36 | goals="Provide versatile support with a sarcastic tone. Add humor to interactions.", 37 | background="A cynical but helpful assistant who uses dry wit and gentle teasing while delivering high-quality information." 38 | ) 39 | ``` 40 | 41 | ### Storing Personas 42 | 43 | Once created, personas can be stored in the memory provider for future use: 44 | 45 | ```python 46 | # Store the persona in the memory provider 47 | persona_id = tech_expert.store_persona(memory_provider) 48 | print(f"Stored persona with ID: {persona_id}") 49 | ``` 50 | 51 | ### Generating Persona Prompts 52 | 53 | Personas can generate system prompts for language models: 54 | 55 | ```python 56 | # Generate a prompt that can be used with LLMs 57 | system_prompt = tech_expert.generate_system_prompt_input() 58 | print(system_prompt) 59 | ``` 60 | 61 | ### Retrieving Personas 62 | 63 | Personas can be retrieved by ID: 64 | 65 | ```python 66 | # Retrieve a persona using its ID 67 | retrieved_persona = Persona.retrieve_persona(persona_id, memory_provider) 68 | print(retrieved_persona) 69 | ``` 70 | 71 | Or by semantic similarity to a query: 72 | 73 | ```python 74 | # Find personas matching a specific need 75 | similar_personas = Persona.get_most_similar_persona( 76 | "I need a technical expert who can explain complex concepts simply", 77 | memory_provider, 78 | limit=1 79 | ) 80 | ``` 81 | 82 | ### Using Personas with MemAgents 83 | 84 | Personas can be assigned to MemAgents to control their behavior: 85 | 86 | ```python 87 | from src.memorizz.memagent import MemAgent 88 | 89 | # Create an agent with a specific persona 90 | agent = MemAgent( 91 | model=None, # Will use default model 92 | persona=tech_expert, 93 | instruction="Help users with their technical questions", 94 | memory_provider=memory_provider 95 | ) 96 | 97 | # Or set/change a persona later 98 | agent.set_persona(sarcastic_assistant) 99 | 100 | # Run the agent with its persona influencing responses 101 | response = agent.run("Can you help me fix my computer?") 102 | ``` 103 | 104 | ### Persona Persistence 105 | 106 | Personas are stored with vector embeddings for efficient retrieval: 107 | 108 | ```python 109 | # List all available personas 110 | all_personas = memory_provider.list_all(memory_type=MemoryType.PERSONA) 111 | 112 | # Delete a persona 113 | memory_provider.delete_by_id(persona_id, memory_type=MemoryType.PERSONA) 114 | ``` 115 | 116 | ## Implementation Notes 117 | 118 | - Persona embeddings are generated from their attributes for semantic search 119 | - The system automatically converts personas to appropriate prompts for language models 120 | - Personas can be used across multiple agents for consistent behavior 121 | - Custom persona attributes can be added beyond the basic required fields 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /.claude.md: -------------------------------------------------------------------------------- 1 | # Claude Instructions for Memorizz Project 2 | 3 | This file contains specific instructions for Claude Code to help with common development tasks for the Memorizz project. 4 | 5 | ## Project Overview 6 | 7 | Memorizz is a Python library for AI agent memory management with MongoDB integration and semantic caching capabilities. The project uses semantic versioning and is published to PyPI. 8 | 9 | ## Development Commands 10 | 11 | ### Linting and Type Checking 12 | When making code changes, always run these commands before committing: 13 | ```bash 14 | # Add the appropriate linting commands here once identified 15 | # Example: flake8, black, mypy, etc. 16 | ``` 17 | 18 | ### Testing 19 | ```bash 20 | # Add test commands here once test framework is identified 21 | # Example: pytest, python -m unittest, etc. 22 | ``` 23 | 24 | ## PyPI Deployment Process 25 | 26 | Use this process when deploying a new version to PyPI: 27 | 28 | ### 1. Version Update 29 | Update the version number in `pyproject.toml`: 30 | ```toml 31 | [project] 32 | name = "memorizz" 33 | version = "X.X.X" # Update this version number 34 | ``` 35 | 36 | ### 2. Clean and Build 37 | ```bash 38 | # Clean previous builds 39 | rm -rf dist/ 40 | 41 | # Install build dependencies 42 | pip install build twine 43 | 44 | # Build source distribution and wheel 45 | python -m build 46 | ``` 47 | 48 | ### 3. Git Operations 49 | ```bash 50 | # Commit version changes 51 | git add pyproject.toml 52 | git commit -m "Bump version to X.X.X for PyPI release" 53 | 54 | # Create and push annotated tag 55 | git tag -a vX.X.X -m "Release version X.X.X" 56 | git push origin vX.X.X 57 | git push origin main 58 | ``` 59 | 60 | ### 4. PyPI Upload 61 | ```bash 62 | # Upload to production PyPI (requires API token) 63 | twine upload dist/* 64 | ``` 65 | 66 | **Note**: You'll need to provide your PyPI API token when prompted, or set up `~/.pypirc` with your credentials: 67 | ```ini 68 | [pypi] 69 | username = __token__ 70 | password = your-api-token-here 71 | ``` 72 | 73 | ## Project Structure Notes 74 | 75 | - **Main package**: Located in `src/memorizz/` 76 | - **Examples**: Located in `examples/` directory with Jupyter notebooks 77 | - **Memory types**: Defined in `src/memorizz/enums/memory_type.py` 78 | - **MongoDB provider**: Located in `src/memorizz/memory_provider/mongodb/` 79 | - **Semantic cache**: Located in `src/memorizz/short_term_memory/semantic_cache.py` 80 | 81 | ## Key Features to Remember 82 | 83 | - **Semantic Cache**: Vector-based query-response caching with configurable similarity thresholds 84 | - **Memory Types**: CONVERSATION_MEMORY, WORKFLOW_MEMORY, LONG_TERM_MEMORY, SHORT_TERM_MEMORY, PERSONAS, TOOLBOX, SHARED_MEMORY, MEMAGENT, SUMMARIES, SEMANTIC_CACHE 85 | - **Scoping**: Supports LOCAL (agent-specific) and GLOBAL (cross-agent) cache scopes 86 | - **MongoDB Integration**: Uses MongoDB Atlas with vector search capabilities 87 | 88 | ## Semantic Cache Configuration Example 89 | 90 | ```python 91 | from memorizz.short_term_memory.semantic_cache import SemanticCacheConfig 92 | from memorizz.enums import SemanticCacheScope 93 | 94 | config = SemanticCacheConfig( 95 | similarity_threshold=0.85, # 0.0-1.0 scale 96 | max_cache_size=1000, # Maximum entries 97 | ttl_hours=24.0, # Time-to-live 98 | scope=SemanticCacheScope.LOCAL, # LOCAL or GLOBAL 99 | enable_memory_provider_sync=True 100 | ) 101 | ``` 102 | 103 | ## Important Reminders 104 | 105 | - Always test changes before deploying 106 | - Semantic versioning: MAJOR.MINOR.PATCH 107 | - Check that all dependencies are properly listed in `pyproject.toml` 108 | - Ensure MongoDB configurations are properly handled 109 | - Semantic cache requires embedding providers (OpenAI, VoyageAI, etc.) 110 | 111 | ## Common Issues 112 | 113 | - **Embedding Provider**: Make sure embedding providers are configured correctly 114 | - **MongoDB Atlas**: Vector search indexes must be created for semantic functionality 115 | - **Dependencies**: Ensure all required packages are installed and compatible 116 | 117 | ## Repository Information 118 | 119 | - **GitHub**: https://github.com/RichmondAlake/memorizz 120 | - **PyPI**: https://pypi.org/project/memorizz/ 121 | - **Main Branch**: `main` 122 | - **License**: MIT -------------------------------------------------------------------------------- /eval/longmemeval/README.md: -------------------------------------------------------------------------------- 1 | # LongMemEval Evaluation for Memorizz 2 | 3 | This directory contains the evaluation script for testing Memorizz's long-term memory capabilities using the LongMemEval benchmark. 4 | 5 | ## Setup 6 | 7 | ### 1. Download the Dataset 8 | 9 | The LongMemEval dataset needs to be downloaded manually from the official repository: 10 | 11 | ```bash 12 | # Run the download helper script 13 | python download_dataset.py 14 | ``` 15 | 16 | This will provide instructions for downloading the dataset files. You need to: 17 | 18 | 1. Visit https://github.com/xiaowu0162/LongMemEval 19 | 2. Follow their setup instructions 20 | 3. Download the dataset files: 21 | - `longmemeval_oracle.json` 22 | - `longmemeval_s.json` 23 | - `longmemeval_m.json` 24 | 4. Place these files in the `data/` directory 25 | 26 | ### 2. Install Dependencies 27 | 28 | Make sure you have the required packages installed: 29 | 30 | ```bash 31 | pip install datasets transformers 32 | ``` 33 | 34 | ### 3. Configure Environment Variables 35 | 36 | The script requires OpenAI API access for evaluation. Set your API key: 37 | 38 | ```bash 39 | export OPENAI_API_KEY="your-openai-api-key" 40 | ``` 41 | 42 | Optionally, configure MongoDB for memory storage: 43 | 44 | ```bash 45 | export MONGODB_URI="your-mongodb-connection-string" 46 | ``` 47 | 48 | ## Usage 49 | 50 | ### Basic Evaluation 51 | 52 | Run the evaluation with default settings (oracle variant, 50 samples): 53 | 54 | ```bash 55 | python evaluate_memorizz.py 56 | ``` 57 | 58 | ### Custom Configuration 59 | 60 | ```bash 61 | python evaluate_memorizz.py \ 62 | --dataset_variant oracle \ 63 | --num_samples 100 \ 64 | --application_mode general \ 65 | --output_dir ./results \ 66 | --verbose 67 | ``` 68 | 69 | ### Parameters 70 | 71 | - `--dataset_variant`: Choose from "oracle", "s", or "m" (default: "oracle") 72 | - `--num_samples`: Number of samples to evaluate (default: 50) 73 | - `--application_mode`: Memorizz application mode to use (default: "general") 74 | - `--output_dir`: Directory to save results (default: "./results") 75 | - `--verbose`: Enable verbose logging 76 | 77 | ### Dataset Variants 78 | 79 | - **oracle**: Contains only the evidence sessions (easier, for testing) 80 | - **s**: Short version with ~40 history sessions (~115k tokens) 81 | - **m**: Medium version with ~500 history sessions (much longer) 82 | 83 | ## Output 84 | 85 | The evaluation script will: 86 | 87 | 1. Load the specified dataset variant 88 | 2. Create fresh Memorizz agents for each sample 89 | 3. Process conversation histories to build memory 90 | 4. Ask evaluation questions and collect responses 91 | 5. Use GPT-4 to evaluate response quality 92 | 6. Save detailed results to JSON files 93 | 94 | Results include: 95 | - Overall accuracy and scores 96 | - Performance by category (IE, MR, KU, TR, ABS) 97 | - Detailed per-sample results 98 | - Processing time statistics 99 | 100 | ## Example Output 101 | 102 | ``` 103 | EVALUATION SUMMARY 104 | ================================================== 105 | Dataset Variant: oracle 106 | Application Mode: general 107 | Samples Evaluated: 50 108 | Overall Accuracy: 0.720 109 | Overall Score: 0.756 110 | Processing Time: 245.67s 111 | 112 | Category Performance: 113 | information_extraction: 0.850 (12 samples) 114 | multi_session_reasoning: 0.667 (15 samples) 115 | knowledge_updates: 0.700 (10 samples) 116 | temporal_reasoning: 0.600 (8 samples) 117 | abstention: 0.800 (5 samples) 118 | 119 | Detailed results saved to: ./results/longmemeval_oracle_general_20241201_143022.json 120 | ``` 121 | 122 | ## Troubleshooting 123 | 124 | ### Dataset Not Found 125 | 126 | If you get a "Dataset file not found" error: 127 | 1. Make sure you've downloaded the dataset files 128 | 2. Check that they're in the correct `data/` directory 129 | 3. Verify the filenames match exactly 130 | 131 | ### Memory Provider Issues 132 | 133 | If MongoDB connection fails, the script will fall back to the default memory provider. For best results, configure a proper MongoDB instance. 134 | 135 | ### API Rate Limits 136 | 137 | The evaluation uses GPT-4 for scoring, which may hit rate limits with large evaluations. Consider: 138 | - Using smaller `num_samples` values 139 | - Adding delays between API calls 140 | - Using a higher-tier OpenAI account -------------------------------------------------------------------------------- /src/memorizz/memagent/builders/config_builder.py: -------------------------------------------------------------------------------- 1 | """Configuration builder for MemAgent.""" 2 | 3 | import logging 4 | from typing import Any, Dict 5 | 6 | from ..models import MemAgentConfig 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class ConfigBuilder: 12 | """ 13 | Builder for MemAgent configuration objects. 14 | 15 | This provides a fluent interface for building complex configurations 16 | that can be reused across multiple agent instances. 17 | """ 18 | 19 | def __init__(self): 20 | """Initialize the config builder.""" 21 | self._config_dict = {} 22 | 23 | def instruction(self, text: str) -> "ConfigBuilder": 24 | """Set the instruction.""" 25 | self._config_dict["instruction"] = text 26 | return self 27 | 28 | def max_steps(self, steps: int) -> "ConfigBuilder": 29 | """Set maximum steps.""" 30 | self._config_dict["max_steps"] = steps 31 | return self 32 | 33 | def tool_access(self, access: str) -> "ConfigBuilder": 34 | """Set tool access level.""" 35 | self._config_dict["tool_access"] = access 36 | return self 37 | 38 | def semantic_cache(self, enabled: bool) -> "ConfigBuilder": 39 | """Enable/disable semantic cache.""" 40 | self._config_dict["semantic_cache"] = enabled 41 | return self 42 | 43 | def application_mode(self, mode: str) -> "ConfigBuilder": 44 | """Set application mode.""" 45 | self._config_dict["application_mode"] = mode 46 | return self 47 | 48 | def verbose(self, enabled: bool) -> "ConfigBuilder": 49 | """Enable/disable verbose logging.""" 50 | self._config_dict["verbose"] = enabled 51 | return self 52 | 53 | def custom(self, key: str, value: Any) -> "ConfigBuilder": 54 | """Add custom configuration parameter.""" 55 | self._config_dict[key] = value 56 | return self 57 | 58 | def build(self) -> MemAgentConfig: 59 | """ 60 | Build the configuration object. 61 | 62 | Returns: 63 | Configured MemAgentConfig instance. 64 | """ 65 | return MemAgentConfig(**self._config_dict) 66 | 67 | def to_dict(self) -> Dict[str, Any]: 68 | """ 69 | Export configuration as dictionary. 70 | 71 | Returns: 72 | Dictionary representation of the configuration. 73 | """ 74 | return self._config_dict.copy() 75 | 76 | 77 | # Preset configurations 78 | class ConfigPresets: 79 | """Predefined configuration presets for common use cases.""" 80 | 81 | @staticmethod 82 | def assistant() -> MemAgentConfig: 83 | """Configuration for general assistant.""" 84 | return ( 85 | ConfigBuilder() 86 | .instruction("You are a helpful AI assistant.") 87 | .max_steps(20) 88 | .application_mode("assistant") 89 | .semantic_cache(False) 90 | .build() 91 | ) 92 | 93 | @staticmethod 94 | def chatbot() -> MemAgentConfig: 95 | """Configuration for conversational chatbot.""" 96 | return ( 97 | ConfigBuilder() 98 | .instruction("You are a friendly conversational chatbot.") 99 | .max_steps(15) 100 | .application_mode("chatbot") 101 | .semantic_cache(True) 102 | .build() 103 | ) 104 | 105 | @staticmethod 106 | def task_agent() -> MemAgentConfig: 107 | """Configuration for task-oriented agent.""" 108 | return ( 109 | ConfigBuilder() 110 | .instruction( 111 | "You are a task-oriented agent focused on completing specific objectives." 112 | ) 113 | .max_steps(30) 114 | .application_mode("agent") 115 | .tool_access("private") 116 | .semantic_cache(False) 117 | .build() 118 | ) 119 | 120 | @staticmethod 121 | def research_agent() -> MemAgentConfig: 122 | """Configuration for research and analysis agent.""" 123 | return ( 124 | ConfigBuilder() 125 | .instruction( 126 | "You are a research agent specialized in information gathering and analysis." 127 | ) 128 | .max_steps(25) 129 | .application_mode("agent") 130 | .semantic_cache(True) 131 | .verbose(True) 132 | .build() 133 | ) 134 | -------------------------------------------------------------------------------- /=0.26.0: -------------------------------------------------------------------------------- 1 | Requirement already satisfied: sentence-transformers in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (5.1.2) 2 | Requirement already satisfied: transformers in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (4.57.1) 3 | Collecting accelerate 4 | Using cached accelerate-1.10.1-py3-none-any.whl (374 kB) 5 | Requirement already satisfied: huggingface-hub>=0.20.0 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from sentence-transformers) (0.35.0) 6 | Requirement already satisfied: typing_extensions>=4.5.0 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from sentence-transformers) (4.15.0) 7 | Requirement already satisfied: scikit-learn in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from sentence-transformers) (1.0.2) 8 | Requirement already satisfied: Pillow in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from sentence-transformers) (9.0.1) 9 | Requirement already satisfied: tqdm in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from sentence-transformers) (4.67.1) 10 | Requirement already satisfied: torch>=1.11.0 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from sentence-transformers) (2.2.2) 11 | Requirement already satisfied: scipy in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from sentence-transformers) (1.7.3) 12 | Requirement already satisfied: filelock in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from transformers) (3.6.0) 13 | Requirement already satisfied: packaging>=20.0 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from transformers) (25.0) 14 | Requirement already satisfied: regex!=2019.12.17 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from transformers) (2022.3.15) 15 | Requirement already satisfied: numpy>=1.17 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from transformers) (1.21.5) 16 | Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from transformers) (0.22.1) 17 | Requirement already satisfied: pyyaml>=5.1 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from transformers) (6.0.2) 18 | Requirement already satisfied: requests in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from transformers) (2.32.5) 19 | Requirement already satisfied: safetensors>=0.4.3 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from transformers) (0.6.2) 20 | Requirement already satisfied: psutil in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from accelerate) (5.9.4) 21 | Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from huggingface-hub>=0.20.0->sentence-transformers) (1.1.10) 22 | Requirement already satisfied: fsspec>=2023.5.0 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from huggingface-hub>=0.20.0->sentence-transformers) (2025.9.0) 23 | Requirement already satisfied: sympy in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from torch>=1.11.0->sentence-transformers) (1.10.1) 24 | Requirement already satisfied: networkx in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from torch>=1.11.0->sentence-transformers) (3.2.1) 25 | Requirement already satisfied: jinja2 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from torch>=1.11.0->sentence-transformers) (3.1.6) 26 | Requirement already satisfied: MarkupSafe>=2.0 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from jinja2->torch>=1.11.0->sentence-transformers) (3.0.2) 27 | Requirement already satisfied: idna<4,>=2.5 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from requests->transformers) (3.3) 28 | Requirement already satisfied: charset_normalizer<4,>=2 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from requests->transformers) (2.0.4) 29 | Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from requests->transformers) (2.5.0) 30 | Requirement already satisfied: certifi>=2017.4.17 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from requests->transformers) (2025.8.3) 31 | Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from scikit-learn->sentence-transformers) (2.2.0) 32 | Requirement already satisfied: joblib>=0.11 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from scikit-learn->sentence-transformers) (1.1.0) 33 | Requirement already satisfied: mpmath>=0.19 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from sympy->torch>=1.11.0->sentence-transformers) (1.2.1) 34 | Installing collected packages: accelerate 35 | Successfully installed accelerate-1.10.1 36 | -------------------------------------------------------------------------------- /tests/unit/test_filesystem_provider.py: -------------------------------------------------------------------------------- 1 | """Tests for the filesystem memory provider.""" 2 | 3 | from pathlib import Path 4 | from typing import List 5 | 6 | import pytest 7 | 8 | from memorizz.enums import MemoryType 9 | from memorizz.memagent import MemAgentModel 10 | from memorizz.memory_provider import FileSystemConfig, FileSystemProvider 11 | 12 | 13 | class DummyEmbeddingProvider: 14 | """Minimal embedding provider used to avoid network calls.""" 15 | 16 | def __init__(self) -> None: 17 | self.calls: List[str] = [] 18 | 19 | def get_embedding(self, text: str) -> List[float]: 20 | self.calls.append(text) 21 | seed = float(sum(ord(ch) for ch in text)) 22 | return [seed, float(len(text) or 1), 0.0] 23 | 24 | def get_provider_info(self) -> str: 25 | return "dummy" 26 | 27 | 28 | def _make_provider(tmp_path, embedding_provider=None) -> FileSystemProvider: 29 | root = Path(tmp_path) / "fs-memory" 30 | config = FileSystemConfig( 31 | root_path=root, embedding_provider=embedding_provider, lazy_vector_indexes=True 32 | ) 33 | return FileSystemProvider(config) 34 | 35 | 36 | def test_store_and_query_documents(tmp_path): 37 | provider = _make_provider(tmp_path) 38 | 39 | doc_id = provider.store( 40 | { 41 | "name": "demo", 42 | "content": "hello filesystem memory", 43 | "memory_id": "memory-123", 44 | }, 45 | memory_store_type=MemoryType.LONG_TERM_MEMORY, 46 | ) 47 | 48 | retrieved = provider.retrieve_by_id(doc_id, MemoryType.LONG_TERM_MEMORY) 49 | assert retrieved["content"] == "hello filesystem memory" 50 | 51 | results = provider.retrieve_by_query( 52 | {"memory_id": "memory-123"}, 53 | memory_type=MemoryType.LONG_TERM_MEMORY, 54 | limit=1, 55 | ) 56 | assert results and results[0]["id"] == doc_id 57 | 58 | provider.delete_by_id(doc_id, MemoryType.LONG_TERM_MEMORY) 59 | assert provider.list_all(MemoryType.LONG_TERM_MEMORY) == [] 60 | 61 | 62 | def test_memagent_round_trip(tmp_path): 63 | provider = _make_provider(tmp_path) 64 | 65 | agent = MemAgentModel( 66 | instruction="test agent", 67 | memory_ids=["mem-1"], 68 | application_mode="assistant", 69 | ) 70 | agent_id = provider.store_memagent(agent) 71 | 72 | loaded = provider.retrieve_memagent(agent_id) 73 | assert loaded is not None 74 | assert loaded.memory_ids == ["mem-1"] 75 | 76 | provider.delete_memagent(agent_id) 77 | assert provider.retrieve_memagent(agent_id) is None 78 | 79 | 80 | def test_semantic_query_uses_embedding_provider(tmp_path): 81 | dummy = DummyEmbeddingProvider() 82 | provider = _make_provider(tmp_path, embedding_provider=dummy) 83 | 84 | provider.store( 85 | { 86 | "content": "alpha memory block", 87 | "memory_id": "alpha", 88 | "embedding": dummy.get_embedding("alpha memory block"), 89 | }, 90 | memory_store_type=MemoryType.LONG_TERM_MEMORY, 91 | ) 92 | provider.store( 93 | { 94 | "content": "beta unrelated record", 95 | "memory_id": "beta", 96 | "embedding": dummy.get_embedding("beta unrelated record"), 97 | }, 98 | memory_store_type=MemoryType.LONG_TERM_MEMORY, 99 | ) 100 | 101 | results = provider.retrieve_by_query( 102 | "alpha memory block", 103 | memory_type=MemoryType.LONG_TERM_MEMORY, 104 | limit=1, 105 | memory_id="alpha", 106 | ) 107 | assert results and results[0]["memory_id"] == "alpha" 108 | assert "alpha memory block" in dummy.calls 109 | 110 | 111 | def test_keyword_search_without_embeddings(tmp_path): 112 | provider = _make_provider(tmp_path) 113 | provider.store( 114 | {"content": "remember keyword fallback", "memory_id": "k1"}, 115 | memory_store_type=MemoryType.LONG_TERM_MEMORY, 116 | ) 117 | 118 | # Force keyword path by disabling embedding lookups 119 | provider._embedding_provider = None 120 | provider._get_embedding_provider = lambda: None 121 | 122 | results = provider.retrieve_by_query( 123 | "keyword fallback", memory_type=MemoryType.LONG_TERM_MEMORY, limit=1 124 | ) 125 | assert results and results[0]["memory_id"] == "k1" 126 | 127 | 128 | def test_delete_memagent_cascade_removes_memories(tmp_path): 129 | provider = _make_provider(tmp_path) 130 | 131 | memory_id = "shared-memory" 132 | provider.store( 133 | {"content": "greeting", "memory_id": memory_id}, 134 | memory_store_type=MemoryType.CONVERSATION_MEMORY, 135 | ) 136 | 137 | agent = MemAgentModel( 138 | instruction="cascade", 139 | memory_ids=[memory_id], 140 | application_mode="assistant", 141 | ) 142 | agent_id = provider.store_memagent(agent) 143 | 144 | provider.delete_memagent(agent_id, cascade=True) 145 | assert provider.list_all(MemoryType.CONVERSATION_MEMORY) == [] 146 | -------------------------------------------------------------------------------- /eval/longmemeval/README_evaluation_architectures.md: -------------------------------------------------------------------------------- 1 | # LongMemEval Multi-Architecture Evaluation 2 | 3 | This directory contains three evaluation scripts for testing Memorizz's long-term memory capabilities using the LongMemEval benchmark across different agentic architectures. 4 | 5 | ## Available Evaluation Scripts 6 | 7 | ### 1. Single Agent Evaluation (`evaluate_memorizz.py`) 8 | **Architecture**: Single Agent 9 | **Description**: Evaluates a single MemAgent's memory capabilities using traditional single-agent architecture. 10 | 11 | **Key Features**: 12 | - Single agent handles all memory tasks 13 | - Direct conversation processing 14 | - Baseline performance measurement 15 | - Simple architecture for comparison 16 | 17 | **Usage**: 18 | ```bash 19 | python evaluate_memorizz.py --variant oracle --samples 50 --verbose 20 | ``` 21 | 22 | ### 2. Delegate Pattern Evaluation (`evaluate_delegate_pattern.py`) 23 | **Architecture**: Multi-Agent Delegate Pattern 24 | **Description**: Evaluates multi-agent architecture where a root agent delegates tasks to specialized agents working in parallel. 25 | 26 | **Key Features**: 27 | - **Root Agent**: Coordinates and delegates tasks 28 | - **Memory Specialist**: Focuses on memory retrieval and organization 29 | - **Temporal Specialist**: Handles time-based queries and sequencing 30 | - **Context Integrator**: Manages cross-session analysis and patterns 31 | - Parallel task execution 32 | - Flat delegation structure 33 | 34 | **Agent Structure**: 35 | ``` 36 | Root Agent (Coordinator) 37 | ├── Memory Specialist 38 | ├── Temporal Specialist 39 | └── Context Integrator 40 | ``` 41 | 42 | **Usage**: 43 | ```bash 44 | python evaluate_delegate_pattern.py --variant oracle --samples 50 --verbose 45 | ``` 46 | 47 | ### 3. Hierarchical Pattern Evaluation (`evaluate_hierarchical_pattern.py`) 48 | **Architecture**: Multi-Agent Hierarchical Pattern 49 | **Description**: Evaluates hierarchical multi-agent architecture with multiple organizational levels and specialized branches. 50 | 51 | **Key Features**: 52 | - **Executive Agent**: Top-level strategic coordination 53 | - **Branch Coordinators**: Middle management for specific domains 54 | - **Specialist Agents**: Bottom-level task execution 55 | - Hierarchical task distribution 56 | - Structured command chain 57 | 58 | **Agent Hierarchy**: 59 | ``` 60 | Executive Coordinator (Top Level) 61 | ├── Memory Branch 62 | │ ├── Memory Coordinator (Middle Level) 63 | │ └── Memory Retrieval Specialist (Bottom Level) 64 | └── Analysis Branch 65 | ├── Analysis Coordinator (Middle Level) 66 | ├── Temporal Analysis Specialist (Bottom Level) 67 | └── Context Extraction Specialist (Bottom Level) 68 | ``` 69 | 70 | **Usage**: 71 | ```bash 72 | python evaluate_hierarchical_pattern.py --variant oracle --samples 50 --verbose 73 | ``` 74 | 75 | ## Architecture Comparison 76 | 77 | | Feature | Single Agent | Delegate Pattern | Hierarchical Pattern | 78 | |---------|-------------|------------------|---------------------| 79 | | **Complexity** | Low | Medium | High | 80 | | **Specialization** | None | High | Very High | 81 | | **Coordination** | N/A | Flat | Multi-level | 82 | | **Scalability** | Limited | Good | Excellent | 83 | | **Task Distribution** | None | Parallel | Hierarchical | 84 | | **Command Structure** | Direct | Delegate | Chain of Command | 85 | 86 | ## Evaluation Metrics 87 | 88 | All evaluation scripts measure: 89 | - Response accuracy against ground truth 90 | - Response time performance 91 | - Memory utilization effectiveness 92 | - Architecture-specific metrics 93 | 94 | ## Expected Use Cases 95 | 96 | ### Single Agent 97 | - Baseline performance measurement 98 | - Simple memory tasks 99 | - Resource-constrained environments 100 | 101 | ### Delegate Pattern 102 | - Parallel processing requirements 103 | - Specialized task domains 104 | - Medium complexity scenarios 105 | 106 | ### Hierarchical Pattern 107 | - Complex organizational tasks 108 | - Large-scale coordination 109 | - Enterprise-level scenarios 110 | 111 | ## Running Comparative Analysis 112 | 113 | To compare all three architectures: 114 | 115 | ```bash 116 | # Run all evaluations 117 | python evaluate_memorizz.py --variant oracle --samples 50 --output-dir ./results/single 118 | python evaluate_delegate_pattern.py --variant oracle --samples 50 --output-dir ./results/delegate 119 | python evaluate_hierarchical_pattern.py --variant oracle --samples 50 --output-dir ./results/hierarchical 120 | 121 | # Results will be saved with architecture identifiers for comparison 122 | ``` 123 | 124 | ## Dataset Variants 125 | 126 | All scripts support three LongMemEval variants: 127 | - `oracle`: Full dataset with ground truth 128 | - `s`: Short conversation variant 129 | - `m`: Medium conversation variant 130 | 131 | ## Output Format 132 | 133 | Each evaluation produces JSON results with: 134 | - Architecture identification 135 | - Detailed sample results 136 | - Aggregate performance metrics 137 | - Timestamp and configuration info 138 | 139 | Results are saved in the format: `longmemeval_{architecture}_results_{variant}_{timestamp}.json` -------------------------------------------------------------------------------- /eval/longmemeval/download_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Download script for LongMemEval dataset 4 | 5 | This script downloads the LongMemEval dataset from the official Google Drive source 6 | and extracts it to the correct location for the evaluation script. 7 | """ 8 | 9 | import os 10 | import sys 11 | import json 12 | from pathlib import Path 13 | import tarfile 14 | 15 | def install_gdown(): 16 | """Install gdown if not available.""" 17 | try: 18 | import gdown 19 | return gdown 20 | except ImportError: 21 | print("Installing gdown...") 22 | import subprocess 23 | subprocess.check_call([sys.executable, "-m", "pip", "install", "gdown"]) 24 | import gdown 25 | return gdown 26 | 27 | def main(): 28 | """Download LongMemEval dataset.""" 29 | # Get the data directory 30 | script_dir = Path(__file__).parent 31 | data_dir = script_dir / "data" 32 | data_dir.mkdir(exist_ok=True) 33 | 34 | print("LongMemEval Dataset Downloader") 35 | print("=" * 40) 36 | 37 | # Install gdown if needed 38 | try: 39 | gdown = install_gdown() 40 | except Exception as e: 41 | print(f"❌ Failed to install gdown: {e}") 42 | print("Please install manually: pip install gdown") 43 | return 44 | 45 | # Official Google Drive download link 46 | file_id = '1zJgtYRFhOh5zDQzzatiddfjYhFSnyQ80' 47 | url = f'https://drive.google.com/uc?id={file_id}' 48 | file_path = data_dir / 'longmemeval_data.tar.gz' 49 | 50 | print("📥 DOWNLOADING DATASET:") 51 | print(f"Source: Official Google Drive") 52 | print(f"URL: {url}") 53 | print(f"Destination: {file_path}") 54 | print() 55 | 56 | # Download the compressed dataset 57 | if not file_path.exists(): 58 | try: 59 | print("Downloading longmemeval_data.tar.gz...") 60 | gdown.download(url, str(file_path), quiet=False) 61 | print("✅ Download completed!") 62 | except Exception as e: 63 | print(f"❌ Download failed: {e}") 64 | print("You can try downloading manually from:") 65 | print(f"https://drive.google.com/file/d/{file_id}/view") 66 | return 67 | else: 68 | print(f"✅ '{file_path.name}' already exists, skipping download.") 69 | 70 | print() 71 | print("📦 EXTRACTING DATASET:") 72 | 73 | # Check if files already exist 74 | expected_files = [ 75 | 'longmemeval_oracle.json', 76 | 'longmemeval_s.json', 77 | 'longmemeval_m.json' 78 | ] 79 | 80 | files_exist = all((data_dir / filename).exists() for filename in expected_files) 81 | 82 | if not files_exist: 83 | try: 84 | print("Extracting tar.gz file...") 85 | with tarfile.open(file_path, 'r:gz') as tar: 86 | # Extract to data directory 87 | tar.extractall(path=data_dir) 88 | print("✅ Extraction completed!") 89 | except Exception as e: 90 | print(f"❌ Extraction failed: {e}") 91 | return 92 | else: 93 | print("✅ Dataset files already exist, skipping extraction.") 94 | 95 | print() 96 | print("📋 VERIFYING FILES:") 97 | 98 | all_found = True 99 | total_size = 0 100 | 101 | for filename in expected_files: 102 | filepath = data_dir / filename 103 | if filepath.exists(): 104 | size_mb = filepath.stat().st_size / (1024 * 1024) 105 | total_size += size_mb 106 | print(f"✅ {filename} - Found ({size_mb:.1f} MB)") 107 | else: 108 | print(f"❌ {filename} - Not found") 109 | all_found = False 110 | 111 | print() 112 | if all_found: 113 | print(f"🎉 SUCCESS! All dataset files downloaded and extracted ({total_size:.1f} MB total)") 114 | print() 115 | print("📊 DATASET VARIANTS:") 116 | print("• longmemeval_oracle.json - Oracle retrieval (easiest, for testing)") 117 | print("• longmemeval_s.json - Short version (~115k tokens, ~40 sessions)") 118 | print("• longmemeval_m.json - Medium version (~500 sessions)") 119 | print() 120 | print("🚀 READY TO RUN EVALUATION:") 121 | print("cd eval/longmemeval") 122 | print("python evaluate_memorizz.py --dataset_variant oracle") 123 | print("python evaluate_memorizz.py --dataset_variant s") 124 | print("python evaluate_memorizz.py --dataset_variant m") 125 | else: 126 | print("⚠️ Some dataset files are missing after extraction.") 127 | print("Please check the extracted files or try downloading again.") 128 | 129 | # Clean up compressed file (optional) 130 | if file_path.exists() and all_found: 131 | try: 132 | file_path.unlink() 133 | print(f"🗑️ Cleaned up compressed file: {file_path.name}") 134 | except: 135 | pass # Don't fail if cleanup doesn't work 136 | 137 | print(f"\n📂 Data directory: {data_dir}") 138 | print("📄 Dataset paper: https://arxiv.org/abs/2410.10813") 139 | 140 | if __name__ == "__main__": 141 | main() -------------------------------------------------------------------------------- /src/memorizz/embeddings/openai/provider.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any, Dict, List 3 | 4 | import openai 5 | 6 | from .. import BaseEmbeddingProvider 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | # Suppress httpx logs to reduce noise from API requests 11 | logging.getLogger("httpx").setLevel(logging.WARNING) 12 | 13 | 14 | class OpenAIEmbeddingProvider(BaseEmbeddingProvider): 15 | """OpenAI embedding provider implementation.""" 16 | 17 | # Model configuration with their dimensions 18 | MODEL_DIMENSIONS = { 19 | "text-embedding-3-small": 1536, # Default for 3-small is 1536, but can be reduced 20 | "text-embedding-3-large": 3072, # Default for 3-large is 3072, but can be reduced 21 | "text-embedding-ada-002": 1536, # Fixed dimensions 22 | } 23 | 24 | def __init__(self, config: Dict[str, Any] = None): 25 | """ 26 | Initialize OpenAI embedding provider. 27 | 28 | Parameters: 29 | ----------- 30 | config : Dict[str, Any] 31 | Configuration dictionary with keys: 32 | - model: str (default: "text-embedding-3-small") 33 | - dimensions: int (default: 256, only for text-embedding-3-* models) 34 | - api_key: str (optional, uses env var if not provided) 35 | - base_url: str (optional, for custom endpoints) 36 | """ 37 | super().__init__(config) 38 | 39 | # Set default configuration 40 | self.model = self.config.get("model", "text-embedding-3-small") 41 | self.dimensions = self.config.get("dimensions", 256) 42 | 43 | # Validate model and dimensions 44 | if self.model not in self.MODEL_DIMENSIONS: 45 | raise ValueError( 46 | f"Unsupported OpenAI model: {self.model}. Supported models: {list(self.MODEL_DIMENSIONS.keys())}" 47 | ) 48 | 49 | # For ada-002, dimensions cannot be customized 50 | if self.model == "text-embedding-ada-002" and self.dimensions != 1536: 51 | logger.warning( 52 | f"Model {self.model} has fixed dimensions of 1536. Ignoring custom dimensions parameter." 53 | ) 54 | self.dimensions = 1536 55 | 56 | # For 3-small and 3-large, validate dimensions are within allowed range 57 | if self.model in ["text-embedding-3-small", "text-embedding-3-large"]: 58 | max_dims = self.MODEL_DIMENSIONS[self.model] 59 | if self.dimensions > max_dims: 60 | raise ValueError( 61 | f"Dimensions {self.dimensions} exceed maximum {max_dims} for model {self.model}" 62 | ) 63 | 64 | # Initialize OpenAI client 65 | client_kwargs = {} 66 | if "api_key" in self.config: 67 | client_kwargs["api_key"] = self.config["api_key"] 68 | if "base_url" in self.config: 69 | client_kwargs["base_url"] = self.config["base_url"] 70 | 71 | self.client = openai.OpenAI(**client_kwargs) 72 | 73 | logger.info( 74 | f"Initialized OpenAI provider with model={self.model}, dimensions={self.dimensions}" 75 | ) 76 | 77 | def get_embedding(self, text: str, **kwargs) -> List[float]: 78 | """ 79 | Generate embedding using OpenAI's API. 80 | 81 | Parameters: 82 | ----------- 83 | text : str 84 | The text to embed 85 | **kwargs 86 | Additional parameters: 87 | - model: str (override default model) 88 | - dimensions: int (override default dimensions) 89 | 90 | Returns: 91 | -------- 92 | List[float] 93 | The embedding vector 94 | """ 95 | # Allow per-call overrides 96 | model = kwargs.get("model", self.model) 97 | dimensions = kwargs.get("dimensions", self.dimensions) 98 | 99 | # Clean the text 100 | text = text.replace("\n", " ") 101 | 102 | try: 103 | # For ada-002, don't pass dimensions parameter 104 | if model == "text-embedding-ada-002": 105 | response = self.client.embeddings.create(input=[text], model=model) 106 | else: 107 | response = self.client.embeddings.create( 108 | input=[text], model=model, dimensions=dimensions 109 | ) 110 | 111 | return response.data[0].embedding 112 | except Exception as e: 113 | logger.error(f"Error generating OpenAI embedding: {str(e)}") 114 | raise 115 | 116 | def get_dimensions(self) -> int: 117 | """Get the dimensionality of embeddings produced by this provider.""" 118 | return self.dimensions 119 | 120 | def get_default_model(self) -> str: 121 | """Get the default model name for this provider.""" 122 | return self.model 123 | 124 | @classmethod 125 | def get_available_models(cls) -> List[str]: 126 | """Get list of available OpenAI embedding models.""" 127 | return list(cls.MODEL_DIMENSIONS.keys()) 128 | 129 | @classmethod 130 | def get_model_max_dimensions(cls, model: str) -> int: 131 | """Get maximum dimensions for a specific model.""" 132 | return cls.MODEL_DIMENSIONS.get(model, 1536) 133 | -------------------------------------------------------------------------------- /src/memorizz/enums/application_mode.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import List 3 | 4 | from .memory_type import MemoryType 5 | 6 | 7 | class ApplicationMode(Enum): 8 | """ 9 | Application modes define the environment and context the agent operates within, 10 | automatically configuring the appropriate memory types for each scenario. 11 | """ 12 | 13 | # Available application modes 14 | WORKFLOW = "workflow" 15 | DEEP_RESEARCH = "deep_research" 16 | ASSISTANT = "assistant" 17 | 18 | # Default mode 19 | DEFAULT = ASSISTANT 20 | 21 | 22 | class ApplicationModeConfig: 23 | """ 24 | Configuration class that maps application modes to their associated memory types 25 | and provides additional configuration for each mode. 26 | """ 27 | 28 | # Memory type mappings for each application mode 29 | MEMORY_TYPE_MAPPINGS = { 30 | ApplicationMode.WORKFLOW: [ 31 | MemoryType.WORKFLOW_MEMORY, 32 | MemoryType.TOOLBOX, 33 | MemoryType.LONG_TERM_MEMORY, # Knowledge base 34 | MemoryType.SHORT_TERM_MEMORY, # For intermediate results 35 | ], 36 | ApplicationMode.DEEP_RESEARCH: [ 37 | MemoryType.TOOLBOX, 38 | MemoryType.SHARED_MEMORY, 39 | MemoryType.LONG_TERM_MEMORY, # Research knowledge base 40 | MemoryType.SHORT_TERM_MEMORY, # For research sessions 41 | MemoryType.SUMMARIES, # For context compression 42 | ], 43 | ApplicationMode.ASSISTANT: [ 44 | MemoryType.CONVERSATION_MEMORY, 45 | MemoryType.LONG_TERM_MEMORY, # Knowledge base 46 | MemoryType.PERSONAS, # For personalization 47 | MemoryType.ENTITY_MEMORY, # Structured entity facts 48 | MemoryType.SHORT_TERM_MEMORY, # For context 49 | MemoryType.SUMMARIES, # For memory compression 50 | ], 51 | } 52 | 53 | # Description for each application mode 54 | MODE_DESCRIPTIONS = { 55 | ApplicationMode.WORKFLOW: "Optimized for structured task execution and process automation", 56 | ApplicationMode.DEEP_RESEARCH: "Designed for intensive research with collaboration capabilities", 57 | ApplicationMode.ASSISTANT: "General-purpose conversational assistant with personalization", 58 | } 59 | 60 | @classmethod 61 | def get_memory_types(cls, mode: ApplicationMode) -> List[MemoryType]: 62 | """ 63 | Get the memory types associated with an application mode. 64 | 65 | Parameters: 66 | ----------- 67 | mode : ApplicationMode 68 | The application mode to get memory types for. 69 | 70 | Returns: 71 | -------- 72 | List[MemoryType] 73 | List of memory types for the specified mode. 74 | """ 75 | return cls.MEMORY_TYPE_MAPPINGS.get( 76 | mode, cls.MEMORY_TYPE_MAPPINGS[ApplicationMode.DEFAULT] 77 | ) 78 | 79 | @classmethod 80 | def get_description(cls, mode: ApplicationMode) -> str: 81 | """ 82 | Get the description for an application mode. 83 | 84 | Parameters: 85 | ----------- 86 | mode : ApplicationMode 87 | The application mode to get description for. 88 | 89 | Returns: 90 | -------- 91 | str 92 | Description of the application mode. 93 | """ 94 | return cls.MODE_DESCRIPTIONS.get(mode, "General-purpose application mode") 95 | 96 | @classmethod 97 | def list_all_modes(cls) -> List[tuple]: 98 | """ 99 | List all available application modes with their descriptions. 100 | 101 | Returns: 102 | -------- 103 | List[tuple] 104 | List of (mode, description) tuples. 105 | """ 106 | return [(mode, cls.get_description(mode)) for mode in ApplicationMode] 107 | 108 | @classmethod 109 | def validate_mode(cls, mode_input) -> ApplicationMode: 110 | """ 111 | Validate and convert a string or enum to ApplicationMode enum. 112 | 113 | Parameters: 114 | ----------- 115 | mode_input : str | ApplicationMode 116 | String representation or enum of the application mode. 117 | 118 | Returns: 119 | -------- 120 | ApplicationMode 121 | The corresponding ApplicationMode enum. 122 | 123 | Raises: 124 | ------- 125 | ValueError 126 | If the mode input is not valid. 127 | """ 128 | # If it's already an ApplicationMode enum, return it directly 129 | if isinstance(mode_input, ApplicationMode): 130 | return mode_input 131 | 132 | # If it's a string, convert it 133 | if isinstance(mode_input, str): 134 | try: 135 | return ApplicationMode(mode_input.lower()) 136 | except ValueError: 137 | valid_modes = [mode.value for mode in ApplicationMode] 138 | raise ValueError( 139 | f"Invalid application mode: '{mode_input}'. Valid modes: {valid_modes}" 140 | ) 141 | 142 | # If it's neither string nor enum, raise an error 143 | raise ValueError( 144 | f"Application mode must be a string or ApplicationMode enum, got {type(mode_input)}" 145 | ) 146 | -------------------------------------------------------------------------------- /src/memorizz/memory_provider/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import TYPE_CHECKING, Any, Dict, List, Optional 3 | 4 | # Use TYPE_CHECKING for forward references to avoid circular imports 5 | if TYPE_CHECKING: 6 | from memorizz.memagent import MemAgent 7 | 8 | 9 | class MemoryProvider(ABC): 10 | """Abstract base class for memory providers.""" 11 | 12 | @abstractmethod 13 | def __init__(self, config: Dict[str, Any]): 14 | """Initialize the memory provider with configuration settings.""" 15 | 16 | @abstractmethod 17 | def store( 18 | self, 19 | data: Dict[str, Any] = None, 20 | memory_store_type: str = None, 21 | memory_id: str = None, 22 | memory_unit: Any = None, 23 | ) -> str: 24 | """ 25 | Store data in the memory provider. 26 | 27 | Parameters: 28 | ----------- 29 | data : Dict[str, Any], optional 30 | Data dictionary to store (legacy parameter) 31 | memory_store_type : str, optional 32 | Type of memory store (legacy parameter) 33 | memory_id : str, optional 34 | Memory ID to associate with (new parameter) 35 | memory_unit : MemoryUnit, optional 36 | Memory unit object to store (new parameter) 37 | """ 38 | 39 | @abstractmethod 40 | def retrieve_by_query( 41 | self, 42 | query: Dict[str, Any], 43 | memory_store_type: str = None, 44 | limit: int = 1, 45 | memory_id: str = None, 46 | memory_type: str = None, 47 | **kwargs, 48 | ) -> Optional[Dict[str, Any]]: 49 | """ 50 | Retrieve a document from the memory provider. 51 | 52 | Parameters: 53 | ----------- 54 | query : Dict[str, Any] or str 55 | Search query (dict for filter queries, str for semantic search) 56 | memory_store_type : str, optional 57 | Type of memory store (legacy parameter name) 58 | memory_type : str or MemoryType, optional 59 | Type of memory store (new parameter name, takes precedence over memory_store_type) 60 | memory_id : str, optional 61 | Filter results to specific memory_id 62 | limit : int 63 | Maximum number of results to return 64 | **kwargs 65 | Additional provider-specific parameters 66 | """ 67 | 68 | @abstractmethod 69 | def retrieve_by_id( 70 | self, id: str, memory_store_type: str 71 | ) -> Optional[Dict[str, Any]]: 72 | """Retrieve a document from the memory provider by id.""" 73 | 74 | @abstractmethod 75 | def retrieve_by_name( 76 | self, name: str, memory_store_type: str 77 | ) -> Optional[Dict[str, Any]]: 78 | """Retrieve a document from the memory provider by name.""" 79 | 80 | @abstractmethod 81 | def delete_by_id(self, id: str, memory_store_type: str) -> bool: 82 | """Delete a document from the memory provider by id.""" 83 | 84 | @abstractmethod 85 | def delete_by_name(self, name: str, memory_store_type: str) -> bool: 86 | """Delete a document from the memory provider by name.""" 87 | 88 | @abstractmethod 89 | def delete_all(self, memory_store_type: str) -> bool: 90 | """Delete all documents within a memory store type in the memory provider.""" 91 | 92 | @abstractmethod 93 | def list_all(self, memory_store_type: str) -> List[Dict[str, Any]]: 94 | """List all documents within a memory store type in the memory provider.""" 95 | 96 | @abstractmethod 97 | def retrieve_conversation_history_ordered_by_timestamp( 98 | self, memory_id: str, memory_type: str = None, limit: int = None 99 | ) -> List[Dict[str, Any]]: 100 | """ 101 | Retrieve the conversation history ordered by timestamp. 102 | 103 | Parameters: 104 | ----------- 105 | memory_id : str 106 | The memory ID to retrieve history for 107 | memory_type : str or MemoryType, optional 108 | Type of memory (typically CONVERSATION_MEMORY) 109 | limit : int, optional 110 | Maximum number of entries to return 111 | """ 112 | 113 | @abstractmethod 114 | def update_by_id( 115 | self, id: str, data: Dict[str, Any], memory_store_type: str 116 | ) -> bool: 117 | """Update a document in a memory store type in the memory provider by id.""" 118 | 119 | @abstractmethod 120 | def close(self) -> None: 121 | """Close the connection to the memory provider.""" 122 | 123 | @abstractmethod 124 | def store_memagent(self, memagent: "MemAgent") -> str: 125 | """Store a memagent in the memory provider.""" 126 | 127 | @abstractmethod 128 | def delete_memagent(self, agent_id: str, cascade: bool = False) -> bool: 129 | """Delete a memagent from the memory provider.""" 130 | 131 | @abstractmethod 132 | def update_memagent_memory_ids(self, agent_id: str, memory_ids: List[str]) -> bool: 133 | """Update the memory_ids of a memagent in the memory provider.""" 134 | 135 | @abstractmethod 136 | def delete_memagent_memory_ids(self, agent_id: str) -> bool: 137 | """Delete the memory_ids of a memagent in the memory provider.""" 138 | 139 | @abstractmethod 140 | def list_memagents(self) -> List[Dict[str, Any]]: 141 | """List all memagents in the memory provider.""" 142 | -------------------------------------------------------------------------------- /src/memorizz/memagent/managers/workflow_manager.py: -------------------------------------------------------------------------------- 1 | """Workflow management functionality for MemAgent.""" 2 | 3 | import logging 4 | from typing import Any, Dict, List, Optional 5 | 6 | from ...long_term_memory.procedural.workflow.workflow import Workflow, WorkflowOutcome 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class WorkflowManager: 12 | """ 13 | Manages workflow execution and orchestration for MemAgent. 14 | 15 | This class encapsulates workflow-related functionality that was 16 | previously embedded in the main MemAgent class. 17 | """ 18 | 19 | def __init__(self): 20 | """Initialize the workflow manager.""" 21 | self.active_workflows = {} 22 | self.workflow_history = [] 23 | self._workflow_cache = {} 24 | 25 | def execute_workflow( 26 | self, workflow: Workflow, context: Dict[str, Any] 27 | ) -> WorkflowOutcome: 28 | """ 29 | Execute a workflow. 30 | 31 | Args: 32 | workflow: The Workflow instance to execute. 33 | context: Context dictionary for the workflow. 34 | 35 | Returns: 36 | WorkflowOutcome containing the result. 37 | """ 38 | try: 39 | logger.info( 40 | f"Executing workflow: {workflow.name if hasattr(workflow, 'name') else 'unnamed'}" 41 | ) 42 | 43 | # Track active workflow 44 | workflow_id = self._generate_workflow_id() 45 | self.active_workflows[workflow_id] = { 46 | "workflow": workflow, 47 | "context": context, 48 | "status": "running", 49 | } 50 | 51 | # Execute the workflow 52 | outcome = workflow.execute(context) 53 | 54 | # Update tracking 55 | self.active_workflows[workflow_id]["status"] = "completed" 56 | self.active_workflows[workflow_id]["outcome"] = outcome 57 | 58 | # Add to history 59 | self._add_to_history(workflow_id, workflow, context, outcome) 60 | 61 | # Clean up active workflow 62 | del self.active_workflows[workflow_id] 63 | 64 | logger.info( 65 | f"Workflow completed with status: {outcome.status if hasattr(outcome, 'status') else 'unknown'}" 66 | ) 67 | return outcome 68 | 69 | except Exception as e: 70 | logger.error(f"Workflow execution failed: {e}") 71 | return WorkflowOutcome(result=f"Error: {str(e)}", status="failed") 72 | 73 | def get_active_workflows(self) -> Dict[str, Dict[str, Any]]: 74 | """ 75 | Get currently active workflows. 76 | 77 | Returns: 78 | Dictionary of active workflows. 79 | """ 80 | return self.active_workflows.copy() 81 | 82 | def cancel_workflow(self, workflow_id: str) -> bool: 83 | """ 84 | Cancel an active workflow. 85 | 86 | Args: 87 | workflow_id: ID of the workflow to cancel. 88 | 89 | Returns: 90 | True if cancelled, False otherwise. 91 | """ 92 | try: 93 | if workflow_id in self.active_workflows: 94 | self.active_workflows[workflow_id]["status"] = "cancelled" 95 | del self.active_workflows[workflow_id] 96 | logger.info(f"Cancelled workflow: {workflow_id}") 97 | return True 98 | else: 99 | logger.warning(f"Workflow not found for cancellation: {workflow_id}") 100 | return False 101 | 102 | except Exception as e: 103 | logger.error(f"Failed to cancel workflow: {e}") 104 | return False 105 | 106 | def get_workflow_history(self, limit: Optional[int] = None) -> List[Dict[str, Any]]: 107 | """ 108 | Get workflow execution history. 109 | 110 | Args: 111 | limit: Maximum number of history entries to return. 112 | 113 | Returns: 114 | List of workflow history entries. 115 | """ 116 | if limit: 117 | return self.workflow_history[-limit:] 118 | return self.workflow_history.copy() 119 | 120 | def clear_history(self): 121 | """Clear the workflow execution history.""" 122 | self.workflow_history.clear() 123 | logger.debug("Cleared workflow history") 124 | 125 | def _generate_workflow_id(self) -> str: 126 | """Generate a unique workflow ID.""" 127 | import uuid 128 | 129 | return f"workflow_{uuid.uuid4().hex[:8]}" 130 | 131 | def _add_to_history( 132 | self, 133 | workflow_id: str, 134 | workflow: Workflow, 135 | context: Dict[str, Any], 136 | outcome: WorkflowOutcome, 137 | ): 138 | """Add a workflow execution to history.""" 139 | from datetime import datetime 140 | 141 | history_entry = { 142 | "id": workflow_id, 143 | "workflow_name": getattr(workflow, "name", "unnamed"), 144 | "timestamp": datetime.now().isoformat(), 145 | "context": context, 146 | "outcome": { 147 | "result": outcome.result if hasattr(outcome, "result") else None, 148 | "status": outcome.status if hasattr(outcome, "status") else "unknown", 149 | }, 150 | } 151 | 152 | self.workflow_history.append(history_entry) 153 | 154 | # Limit history size 155 | max_history = 100 156 | if len(self.workflow_history) > max_history: 157 | self.workflow_history = self.workflow_history[-max_history:] 158 | -------------------------------------------------------------------------------- /src/memorizz/memory_unit/summary_component.py: -------------------------------------------------------------------------------- 1 | """ 2 | Summary Component for MemAgent 3 | 4 | Provides a structured approach to working with memory summaries that compress 5 | multiple memory components into emotionally and situationally relevant content. 6 | """ 7 | 8 | import time 9 | from datetime import datetime 10 | from typing import List, Optional 11 | 12 | from pydantic import BaseModel 13 | 14 | 15 | class SummaryComponent(BaseModel): 16 | """ 17 | A structured representation of a memory summary. 18 | 19 | Summaries compress multiple memory components from a time period into 20 | emotionally and situationally relevant content using an LLM. 21 | """ 22 | 23 | memory_id: str 24 | agent_id: str 25 | summary_content: str 26 | period_start: float 27 | period_end: float 28 | memory_units_count: int 29 | created_at: float 30 | embedding: Optional[List[float]] = None 31 | 32 | # Optional metadata 33 | summary_type: str = "automatic" # automatic, manual, scheduled 34 | compression_ratio: Optional[float] = None # original_count / summarized_count 35 | emotional_tags: Optional[List[str]] = None # emotional themes identified 36 | situational_tags: Optional[List[str]] = None # situational contexts 37 | importance_score: Optional[float] = None # 0.0 to 1.0 relevance score 38 | 39 | def __init__(self, **data): 40 | """Initialize summary component with current timestamp if not provided.""" 41 | if "created_at" not in data: 42 | data["created_at"] = time.time() 43 | super().__init__(**data) 44 | 45 | @property 46 | def period_start_datetime(self) -> datetime: 47 | """Get period start as a datetime object.""" 48 | return datetime.fromtimestamp(self.period_start) 49 | 50 | @property 51 | def period_end_datetime(self) -> datetime: 52 | """Get period end as a datetime object.""" 53 | return datetime.fromtimestamp(self.period_end) 54 | 55 | @property 56 | def created_datetime(self) -> datetime: 57 | """Get creation time as a datetime object.""" 58 | return datetime.fromtimestamp(self.created_at) 59 | 60 | @property 61 | def period_duration_hours(self) -> float: 62 | """Get the duration of the summarized period in hours.""" 63 | return (self.period_end - self.period_start) / 3600 64 | 65 | def to_dict(self) -> dict: 66 | """Convert to dictionary for storage.""" 67 | return self.model_dump() 68 | 69 | @classmethod 70 | def from_dict(cls, data: dict) -> "SummaryComponent": 71 | """Create from dictionary loaded from storage.""" 72 | return cls(**data) 73 | 74 | def get_short_preview(self, max_length: int = 100) -> str: 75 | """Get a short preview of the summary content.""" 76 | if len(self.summary_content) <= max_length: 77 | return self.summary_content 78 | return self.summary_content[:max_length] + "..." 79 | 80 | def add_emotional_tag(self, tag: str): 81 | """Add an emotional tag to the summary.""" 82 | if self.emotional_tags is None: 83 | self.emotional_tags = [] 84 | if tag not in self.emotional_tags: 85 | self.emotional_tags.append(tag) 86 | 87 | def add_situational_tag(self, tag: str): 88 | """Add a situational tag to the summary.""" 89 | if self.situational_tags is None: 90 | self.situational_tags = [] 91 | if tag not in self.situational_tags: 92 | self.situational_tags.append(tag) 93 | 94 | def calculate_compression_ratio(self, original_memory_count: int): 95 | """Calculate and set the compression ratio.""" 96 | if original_memory_count > 0: 97 | self.compression_ratio = ( 98 | original_memory_count / 1 99 | ) # Summary is 1 compressed item 100 | 101 | def __str__(self) -> str: 102 | """String representation of the summary.""" 103 | return f"Summary({self.memory_id}, {self.period_start_datetime.strftime('%Y-%m-%d')} to {self.period_end_datetime.strftime('%Y-%m-%d')}, {self.memory_units_count} memories)" 104 | 105 | def __repr__(self) -> str: 106 | """Detailed string representation.""" 107 | return f"SummaryComponent(memory_id='{self.memory_id}', agent_id='{self.agent_id}', period='{self.period_start_datetime}' to '{self.period_end_datetime}', memories={self.memory_units_count})" 108 | 109 | 110 | class SummaryMetrics(BaseModel): 111 | """ 112 | Metrics and analytics for summary generation and usage. 113 | """ 114 | 115 | total_summaries: int = 0 116 | total_memories_compressed: int = 0 117 | average_compression_ratio: float = 0.0 118 | most_common_emotional_tags: List[str] = [] 119 | most_common_situational_tags: List[str] = [] 120 | persona_updates_triggered: int = 0 121 | 122 | def add_summary(self, summary: SummaryComponent): 123 | """Add a summary to the metrics.""" 124 | self.total_summaries += 1 125 | self.total_memories_compressed += summary.memory_units_count 126 | 127 | if summary.compression_ratio: 128 | current_total = self.average_compression_ratio * (self.total_summaries - 1) 129 | self.average_compression_ratio = ( 130 | current_total + summary.compression_ratio 131 | ) / self.total_summaries 132 | 133 | def get_compression_efficiency(self) -> float: 134 | """Get overall compression efficiency.""" 135 | if self.total_summaries == 0: 136 | return 0.0 137 | return self.total_memories_compressed / self.total_summaries 138 | -------------------------------------------------------------------------------- /src/memorizz/long_term_memory/episodic/summary_component.py: -------------------------------------------------------------------------------- 1 | """ 2 | Summary Component for MemAgent 3 | 4 | Provides a structured approach to working with memory summaries that compress 5 | multiple memory components into emotionally and situationally relevant content. 6 | """ 7 | 8 | import time 9 | from datetime import datetime 10 | from typing import List, Optional 11 | 12 | from pydantic import BaseModel 13 | 14 | 15 | class SummaryComponent(BaseModel): 16 | """ 17 | A structured representation of a memory summary. 18 | 19 | Summaries compress multiple memory components from a time period into 20 | emotionally and situationally relevant content using an LLM. 21 | """ 22 | 23 | memory_id: str 24 | agent_id: str 25 | summary_content: str 26 | period_start: float 27 | period_end: float 28 | memory_units_count: int 29 | created_at: float 30 | embedding: Optional[List[float]] = None 31 | 32 | # Optional metadata 33 | summary_type: str = "automatic" # automatic, manual, scheduled 34 | compression_ratio: Optional[float] = None # original_count / summarized_count 35 | emotional_tags: Optional[List[str]] = None # emotional themes identified 36 | situational_tags: Optional[List[str]] = None # situational contexts 37 | importance_score: Optional[float] = None # 0.0 to 1.0 relevance score 38 | 39 | def __init__(self, **data): 40 | """Initialize summary component with current timestamp if not provided.""" 41 | if "created_at" not in data: 42 | data["created_at"] = time.time() 43 | super().__init__(**data) 44 | 45 | @property 46 | def period_start_datetime(self) -> datetime: 47 | """Get period start as a datetime object.""" 48 | return datetime.fromtimestamp(self.period_start) 49 | 50 | @property 51 | def period_end_datetime(self) -> datetime: 52 | """Get period end as a datetime object.""" 53 | return datetime.fromtimestamp(self.period_end) 54 | 55 | @property 56 | def created_datetime(self) -> datetime: 57 | """Get creation time as a datetime object.""" 58 | return datetime.fromtimestamp(self.created_at) 59 | 60 | @property 61 | def period_duration_hours(self) -> float: 62 | """Get the duration of the summarized period in hours.""" 63 | return (self.period_end - self.period_start) / 3600 64 | 65 | def to_dict(self) -> dict: 66 | """Convert to dictionary for storage.""" 67 | return self.model_dump() 68 | 69 | @classmethod 70 | def from_dict(cls, data: dict) -> "SummaryComponent": 71 | """Create from dictionary loaded from storage.""" 72 | return cls(**data) 73 | 74 | def get_short_preview(self, max_length: int = 100) -> str: 75 | """Get a short preview of the summary content.""" 76 | if len(self.summary_content) <= max_length: 77 | return self.summary_content 78 | return self.summary_content[:max_length] + "..." 79 | 80 | def add_emotional_tag(self, tag: str): 81 | """Add an emotional tag to the summary.""" 82 | if self.emotional_tags is None: 83 | self.emotional_tags = [] 84 | if tag not in self.emotional_tags: 85 | self.emotional_tags.append(tag) 86 | 87 | def add_situational_tag(self, tag: str): 88 | """Add a situational tag to the summary.""" 89 | if self.situational_tags is None: 90 | self.situational_tags = [] 91 | if tag not in self.situational_tags: 92 | self.situational_tags.append(tag) 93 | 94 | def calculate_compression_ratio(self, original_memory_count: int): 95 | """Calculate and set the compression ratio.""" 96 | if original_memory_count > 0: 97 | self.compression_ratio = ( 98 | original_memory_count / 1 99 | ) # Summary is 1 compressed item 100 | 101 | def __str__(self) -> str: 102 | """String representation of the summary.""" 103 | return f"Summary({self.memory_id}, {self.period_start_datetime.strftime('%Y-%m-%d')} to {self.period_end_datetime.strftime('%Y-%m-%d')}, {self.memory_units_count} memories)" 104 | 105 | def __repr__(self) -> str: 106 | """Detailed string representation.""" 107 | return f"SummaryComponent(memory_id='{self.memory_id}', agent_id='{self.agent_id}', period='{self.period_start_datetime}' to '{self.period_end_datetime}', memories={self.memory_units_count})" 108 | 109 | 110 | class SummaryMetrics(BaseModel): 111 | """ 112 | Metrics and analytics for summary generation and usage. 113 | """ 114 | 115 | total_summaries: int = 0 116 | total_memories_compressed: int = 0 117 | average_compression_ratio: float = 0.0 118 | most_common_emotional_tags: List[str] = [] 119 | most_common_situational_tags: List[str] = [] 120 | persona_updates_triggered: int = 0 121 | 122 | def add_summary(self, summary: SummaryComponent): 123 | """Add a summary to the metrics.""" 124 | self.total_summaries += 1 125 | self.total_memories_compressed += summary.memory_units_count 126 | 127 | if summary.compression_ratio: 128 | current_total = self.average_compression_ratio * (self.total_summaries - 1) 129 | self.average_compression_ratio = ( 130 | current_total + summary.compression_ratio 131 | ) / self.total_summaries 132 | 133 | def get_compression_efficiency(self) -> float: 134 | """Get overall compression efficiency.""" 135 | if self.total_summaries == 0: 136 | return 0.0 137 | return self.total_memories_compressed / self.total_summaries 138 | -------------------------------------------------------------------------------- /docs/internet-access/providers.md: -------------------------------------------------------------------------------- 1 | # Internet Access Providers 2 | 3 | MemoRizz treats internet tooling as a first‑class capability for Deep Research agents. This page explains how providers are discovered, how to configure them, and what to expect from the built-in integrations. 4 | 5 | ## Provider Discovery Order 6 | 7 | When a MemAgent is created with `ApplicationMode.DEEP_RESEARCH`, MemoRizz automatically attempts to attach an internet provider: 8 | 9 | 1. **Explicit override** – Any provider passed via `.with_internet_access_provider(...)` takes priority. 10 | 2. **Environment hint** – If `MEMORIZZ_DEFAULT_INTERNET_PROVIDER` is set, MemoRizz instantiates that provider (optionally with `MEMORIZZ_DEFAULT_INTERNET_PROVIDER_API_KEY`). 11 | 3. **Tavily default** – If no override exists but `TAVILY_API_KEY` is present, MemoRizz prefers Tavily for its speed and structured research output. 12 | 4. **Firecrawl fallback** – If Tavily is unavailable yet `FIRECRAWL_API_KEY` exists, MemoRizz creates a Firecrawl provider. 13 | 5. **Offline provider** – When none of the above are configured, MemoRizz falls back to the built-in `offline` provider so the `internet_search` and `open_web_page` tools still exist and inform the agent/user how to enable real access. 14 | 15 | Regardless of provider, every Deep Research agent (root, delegates, synthesis) registers two tools: 16 | 17 | - `internet_search(query: str, max_results: int = 5)` – returns a list of normalized search results (`title`, `snippet`, `url`, `score`, optional metadata + raw payload). 18 | - `open_web_page(url: str)` – fetches a URL and returns parsed content plus metadata (word count, truncation info, raw body when available). 19 | 20 | You can also call `agent.search_internet(...)` or `agent.fetch_url(...)` directly from Python for the same behavior. 21 | 22 | ## Configuring Providers 23 | 24 | | Setting | Purpose | 25 | | --- | --- | 26 | | `MEMORIZZ_DEFAULT_INTERNET_PROVIDER` | Name registered via `register_provider` (e.g., `tavily`, `firecrawl`). | 27 | | `MEMORIZZ_DEFAULT_INTERNET_PROVIDER_API_KEY` | API key passed to the provider constructed from the env hint. | 28 | | `TAVILY_API_KEY` | Shortcut specifically for the Tavily provider (preferred). | 29 | | `FIRECRAWL_API_KEY` | Shortcut specifically for the Firecrawl provider. | 30 | 31 | To force a provider in code (e.g., for tests), build it manually and pass it to the builder: 32 | 33 | ```python 34 | from memorizz.internet_access.providers.tavily import TavilyProvider 35 | from memorizz.memagent.builders import create_deep_research_agent 36 | 37 | tavily = TavilyProvider(api_key="sk-...") 38 | agent = (create_deep_research_agent("Web scout", internet_provider=tavily) 39 | .with_memory_provider(memory_provider) 40 | .build()) 41 | ``` 42 | 43 | You can also swap providers on an existing agent via `agent.with_internet_access_provider(new_provider)`. 44 | 45 | ## Tavily Provider (Preferred) 46 | 47 | The Tavily integration is the recommended default. It balances speed and extraction quality, and MemoRizz automatically wires it up for Deep Research agents whenever `TAVILY_API_KEY` exists. 48 | 49 | 1. Export `TAVILY_API_KEY=""`. 50 | 2. Optionally configure `MEMORIZZ_DEFAULT_INTERNET_PROVIDER=tavily` to make every Deep Research agent pick it explicitly. 51 | 3. (Optional) Pass a config dict to tune options such as `search_depth`, `default_max_results`, and `max_content_chars`. 52 | 53 | ```python 54 | TavilyProvider( 55 | api_key="sk-...", 56 | config={ 57 | "search_depth": "advanced", 58 | "default_max_results": 8, 59 | "max_content_chars": 10_000, 60 | "include_raw_page": False, 61 | }, 62 | ) 63 | ``` 64 | 65 | Responses include truncation metadata whenever `max_content_chars` shortens an extracted page so downstream prompts can adapt. 66 | 67 | ## Firecrawl Provider 68 | 69 | The Firecrawl integration gives you search + crawl in a single dependency: 70 | 71 | 1. Install the `firecrawl` extra in your environment (if needed). 72 | 2. Export `FIRECRAWL_API_KEY=""`. 73 | 3. Optionally configure `MEMORIZZ_DEFAULT_INTERNET_PROVIDER=firecrawl` to ensure every Deep Research agent uses Firecrawl by default. 74 | 75 | ### Advanced Configuration 76 | 77 | The provider accepts extra keyword arguments via the config dict: 78 | 79 | ```python 80 | FirecrawlProvider( 81 | api_key="sk-...", 82 | base_url="https://api.firecrawl.dev/v1", 83 | timeout=45, 84 | config={ 85 | "max_content_chars": 12_000, 86 | "max_raw_chars": 2_000, 87 | "include_raw_response": True, 88 | }, 89 | ) 90 | ``` 91 | 92 | When run via env variables, you can set the matching `MEMORIZZ_DEFAULT_INTERNET_PROVIDER_*` keys (or edit the config you pass to `create_internet_access_provider`) to tweak timeouts or base URLs. 93 | 94 | ### Response Shape 95 | 96 | - `internet_search` returns a list of objects containing `url`, `title`, `snippet`, optional `score`, and a `metadata` dict with provider-specific fields (`source`, `published_at`, etc.). 97 | - `open_web_page` returns `title`, parsed `content` (Markdown), `metadata` describing truncation, and a `raw` dict with the full provider payload if `include_raw_response` is enabled. 98 | 99 | MemoRizz automatically trims long documents to keep responses within the model’s context window, flagging truncated responses via `metadata["content_truncated"]`. 100 | 101 | ## Offline Provider 102 | 103 | If neither `MEMORIZZ_DEFAULT_INTERNET_PROVIDER` nor `TAVILY_API_KEY`/`FIRECRAWL_API_KEY` is configured, the `offline` provider keeps the internet tools available but responds with helpful error messages. This ensures Deep Research prompts remain stable even on air-gapped machines, while clearly signaling that live browsing is disabled. 104 | -------------------------------------------------------------------------------- /tests/unit/test_entity_memory.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from typing import Any, Dict, List, Optional 3 | 4 | import pytest 5 | 6 | from memorizz.enums.memory_type import MemoryType 7 | from memorizz.long_term_memory.semantic.entity_memory import EntityMemory 8 | from memorizz.memagent.managers.entity_memory_manager import EntityMemoryManager 9 | 10 | 11 | class InMemoryEntityProvider: 12 | """Minimal provider that mimics the entity-memory interface.""" 13 | 14 | def __init__(self): 15 | self.records: Dict[str, Dict[str, Any]] = {} 16 | 17 | def supports_entity_memory(self) -> bool: 18 | return True 19 | 20 | def store(self, data: Dict[str, Any], memory_store_type: MemoryType, **_) -> str: 21 | assert memory_store_type == MemoryType.ENTITY_MEMORY 22 | record = dict(data) 23 | record.setdefault("_id", record.get("entity_id", str(uuid.uuid4()))) 24 | entity_id = record["entity_id"] 25 | 26 | self.records[entity_id] = record 27 | 28 | return record["_id"] 29 | 30 | def retrieve_by_query( 31 | self, 32 | query: Any, 33 | memory_type: MemoryType, 34 | limit: int = 5, 35 | memory_id: Optional[str] = None, 36 | **__, 37 | ) -> List[Dict[str, Any]]: 38 | assert memory_type == MemoryType.ENTITY_MEMORY 39 | 40 | if isinstance(query, dict): 41 | candidates = [ 42 | rec 43 | for rec in self.records.values() 44 | if all(rec.get(key) == value for key, value in query.items()) 45 | and (memory_id is None or rec.get("memory_id") == memory_id) 46 | ] 47 | else: 48 | candidates = [ 49 | rec 50 | for rec in self.records.values() 51 | if memory_id is None or rec.get("memory_id") == memory_id 52 | ] 53 | return candidates[:limit] 54 | 55 | def list_all(self, memory_store_type: MemoryType) -> List[Dict[str, Any]]: 56 | assert memory_store_type == MemoryType.ENTITY_MEMORY 57 | return [dict(rec) for rec in self.records.values()] 58 | 59 | 60 | @pytest.fixture() 61 | def provider() -> InMemoryEntityProvider: 62 | return InMemoryEntityProvider() 63 | 64 | 65 | @pytest.fixture(autouse=True) 66 | def mock_embeddings(monkeypatch): 67 | """Use deterministic embeddings so tests don't hit external services.""" 68 | 69 | def _fake_embedding(text: str) -> List[float]: 70 | return [float(len(text or ""))] 71 | 72 | monkeypatch.setattr( 73 | "memorizz.long_term_memory.semantic.entity_memory.entity_memory.get_embedding", 74 | _fake_embedding, 75 | ) 76 | 77 | 78 | @pytest.fixture() 79 | def entity_store(provider: InMemoryEntityProvider) -> EntityMemory: 80 | return EntityMemory(provider) 81 | 82 | 83 | def test_upsert_merges_attributes( 84 | provider: InMemoryEntityProvider, entity_store: EntityMemory 85 | ): 86 | entity_id = entity_store.upsert_entity( 87 | name="Avery", 88 | entity_type="customer", 89 | attributes=[{"name": "language", "value": "English"}], 90 | memory_id="tenant-1", 91 | ) 92 | 93 | entity_store.upsert_entity( 94 | entity_id=entity_id, 95 | attributes=[{"name": "timezone", "value": "PST"}], 96 | memory_id="tenant-1", 97 | ) 98 | 99 | assert len(provider.records) == 1 100 | first_record = next(iter(provider.records.values())) 101 | stored_attrs = {attr["name"]: attr["value"] for attr in first_record["attributes"]} 102 | assert stored_attrs == {"language": "English", "timezone": "PST"} 103 | 104 | 105 | def test_record_attribute_creates_entity( 106 | provider: InMemoryEntityProvider, entity_store: EntityMemory 107 | ): 108 | entity_id = entity_store.record_attribute( 109 | entity_name="Nova", 110 | attribute_name="favorite_product", 111 | attribute_value="Nebula Drone", 112 | memory_id="tenant-2", 113 | ) 114 | 115 | stored = next(iter(provider.records.values())) 116 | assert stored["entity_id"] == entity_id 117 | assert stored["attributes"][0]["name"] == "favorite_product" 118 | assert stored["attributes"][0]["value"] == "Nebula Drone" 119 | 120 | 121 | def test_manager_build_context_returns_profiles( 122 | provider: InMemoryEntityProvider, entity_store: EntityMemory 123 | ): 124 | entity_store.upsert_entity( 125 | name="Taylor", 126 | entity_type="analyst", 127 | attributes=[{"name": "role", "value": "Analyst"}], 128 | memory_id="team-7", 129 | ) 130 | 131 | manager = EntityMemoryManager(provider) 132 | profiles = manager.build_context("analyst", memory_id="team-7") 133 | 134 | assert profiles and profiles[0]["attributes"]["role"] == "Analyst" 135 | summary = manager.summarize_for_prompt(profiles) 136 | assert "Taylor" in summary 137 | assert "role: Analyst" in summary 138 | 139 | 140 | def test_manager_lookup_filters_by_memory_id( 141 | provider: InMemoryEntityProvider, entity_store: EntityMemory 142 | ): 143 | entity_store.upsert_entity( 144 | name="Jordan", 145 | entity_type="user", 146 | attributes=[{"name": "tier", "value": "gold"}], 147 | memory_id="org-a", 148 | ) 149 | entity_store.upsert_entity( 150 | name="Riley", 151 | entity_type="user", 152 | attributes=[{"name": "tier", "value": "silver"}], 153 | memory_id="org-b", 154 | ) 155 | 156 | assert len(provider.records) == 2 157 | manager = EntityMemoryManager(provider) 158 | raw_matches = provider.retrieve_by_query( 159 | "user", memory_type=MemoryType.ENTITY_MEMORY, memory_id="org-a" 160 | ) 161 | assert len(raw_matches) == 1 162 | matches = manager.lookup_entities(query="user", memory_id="org-a") 163 | 164 | assert len(matches) == 1 165 | assert matches[0]["name"] == "Jordan" 166 | --------------------------------------------------------------------------------