├── =2.5.0
├── =4.36.0
├── .venv
    ├── bin
    │   ├── python
    │   ├── python3
    │   ├── pip
    │   ├── pip3
    │   ├── pip3.10
    │   ├── pip3.7
    │   ├── easy_install
    │   ├── easy_install-3.7
    │   ├── activate.csh
    │   ├── activate
    │   └── activate.fish
    └── pyvenv.cfg
├── install_oracle.sh
├── src
    └── memorizz
    │   ├── short_term_memory
    │       ├── working_memory
    │       │   ├── __init__.py
    │       │   └── cwm.py
    │       └── __init__.py
    │   ├── long_term_memory
    │       ├── procedural
    │       │   ├── toolbox
    │       │   │   ├── __init__.py
    │       │   │   └── tool_schema.py
    │       │   ├── workflow
    │       │   │   └── __init__.py
    │       │   ├── __init__.py
    │       │   └── persona
    │       │   │   └── README.md
    │       ├── semantic
    │       │   ├── persona
    │       │   │   ├── __init__.py
    │       │   │   ├── role_type.py
    │       │   │   └── README.md
    │       │   ├── entity_memory
    │       │   │   ├── __init__.py
    │       │   │   └── README.md
    │       │   └── __init__.py
    │       ├── episodic
    │       │   ├── __init__.py
    │       │   ├── conversational_memory_unit.py
    │       │   └── summary_component.py
    │       └── __init__.py
    │   ├── coordination
    │       ├── __init__.py
    │       └── shared_memory
    │       │   ├── __init__.py
    │       │   └── messages.py
    │   ├── memory_provider
    │       ├── mongodb
    │       │   └── __init__.py
    │       ├── filesystem
    │       │   └── __init__.py
    │       ├── oracle
    │       │   ├── __init__.py
    │       │   └── requirements.txt
    │       ├── __init__.py
    │       └── base.py
    │   ├── llms
    │       ├── __init__.py
    │       ├── llm_factory.py
    │       └── llm_provider.py
    │   ├── internet_access
    │       ├── providers
    │       │   ├── __init__.py
    │       │   └── offline.py
    │       ├── models.py
    │       ├── base.py
    │       └── __init__.py
    │   ├── database
    │       └── __init__.py
    │   ├── enums
    │       ├── role.py
    │       ├── semantic_cache_scope.py
    │       ├── __init__.py
    │       ├── memory_type.py
    │       └── application_mode.py
    │   ├── memagent
    │       ├── orchestrators
    │       │   ├── multi_agent_orchestrator.py
    │       │   └── __init__.py
    │       ├── handlers
    │       │   └── __init__.py
    │       ├── utils
    │       │   └── __init__.py
    │       ├── builders
    │       │   ├── __init__.py
    │       │   └── config_builder.py
    │       ├── constants.py
    │       ├── managers
    │       │   ├── __init__.py
    │       │   ├── internet_access_manager.py
    │       │   └── workflow_manager.py
    │       ├── __init__.py
    │       └── models.py
    │   ├── embeddings
    │       ├── openai
    │       │   ├── __init__.py
    │       │   └── provider.py
    │       ├── azure
    │       │   └── __init__.py
    │       ├── huggingface
    │       │   └── __init__.py
    │       ├── ollama
    │       │   └── __init__.py
    │       └── voyageai
    │       │   └── __init__.py
    │   ├── memory_unit
    │       ├── __init__.py
    │       ├── conversational_memory_unit.py
    │       ├── semantic_cache_entry.py
    │       └── summary_component.py
    │   ├── memagent.py
    │   ├── __init__.py
    │   ├── tests
    │       └── test_vegetarian_recipe_agent.py
    │   └── cli.py
├── tests
    ├── unit
    │   ├── __init__.py
    │   ├── test_firecrawl_provider.py
    │   ├── test_tavily_provider.py
    │   ├── test_internet_access.py
    │   ├── test_filesystem_provider.py
    │   └── test_entity_memory.py
    ├── __init__.py
    ├── integration
    │   └── __init__.py
    ├── mocks
    │   └── __init__.py
    └── performance
    │   └── __init__.py
├── examples
    ├── images
    │   └── memorizz_script_output.png
    └── setup_oracle_user.py
├── .gitignore
├── .env.example
├── docs
    ├── use-cases
    │   ├── workflow-mode.md
    │   └── assistant-mode.md
    ├── memory-types
    │   ├── episodic.md
    │   ├── short-term.md
    │   ├── shared.md
    │   ├── semantic.md
    │   └── procedural.md
    ├── memory-providers
    │   ├── custom.md
    │   ├── mongodb.md
    │   ├── oracle.md
    │   └── filesystem.md
    ├── index.md
    ├── getting-started
    │   ├── overview.md
    │   ├── python-sdk-quickstart.md
    │   └── concepts.md
    ├── utilities
    │   └── context_window_stats.md
    └── internet-access
    │   └── providers.md
├── .github
    └── workflows
    │   └── docs.yml
├── .pre-commit-config.yaml
├── pyproject.toml
├── Makefile
├── pytest.ini
├── setup_dev.sh
├── eval
    ├── README.md
    └── longmemeval
    │   ├── README.md
    │   ├── README_evaluation_architectures.md
    │   └── download_dataset.py
├── install_oracle_client.sh
├── mkdocs.yml
├── .claude.md
└── =0.26.0


/=2.5.0:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/=4.36.0:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.venv/bin/python:
--------------------------------------------------------------------------------
1 | python3


--------------------------------------------------------------------------------
/.venv/bin/python3:
--------------------------------------------------------------------------------
1 | /usr/local/bin/python3


--------------------------------------------------------------------------------
/install_oracle.sh:
--------------------------------------------------------------------------------
1 | src/memorizz/scripts/install_oracle.sh


--------------------------------------------------------------------------------
/src/memorizz/short_term_memory/working_memory/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
1 | """Unit tests for MemAgent components."""
2 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Test package for MemAgent refactored architecture."""
2 | 


--------------------------------------------------------------------------------
/tests/integration/__init__.py:
--------------------------------------------------------------------------------
1 | """Integration tests for MemAgent components."""
2 | 


--------------------------------------------------------------------------------
/tests/mocks/__init__.py:
--------------------------------------------------------------------------------
1 | """Mock objects for testing MemAgent components."""
2 | 


--------------------------------------------------------------------------------
/tests/performance/__init__.py:
--------------------------------------------------------------------------------
1 | """Performance and stress tests for MemAgent."""
2 | 


--------------------------------------------------------------------------------
/.venv/pyvenv.cfg:
--------------------------------------------------------------------------------
1 | home = /usr/local/bin
2 | include-system-site-packages = false
3 | version = 3.7.9
4 | 


--------------------------------------------------------------------------------
/src/memorizz/long_term_memory/procedural/toolbox/__init__.py:
--------------------------------------------------------------------------------
1 | from .toolbox import Toolbox
2 | 
3 | __all__ = ["Toolbox"]
4 | 


--------------------------------------------------------------------------------
/src/memorizz/coordination/__init__.py:
--------------------------------------------------------------------------------
1 | from .shared_memory.shared_memory import SharedMemory
2 | 
3 | __all__ = ["SharedMemory"]
4 | 


--------------------------------------------------------------------------------
/examples/images/memorizz_script_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RichmondAlake/memorizz/HEAD/examples/images/memorizz_script_output.png


--------------------------------------------------------------------------------
/src/memorizz/long_term_memory/procedural/workflow/__init__.py:
--------------------------------------------------------------------------------
1 | from .workflow import Workflow
2 | 
3 | __all__ = ["Workflow", "WorkflowOutcome"]
4 | 


--------------------------------------------------------------------------------
/src/memorizz/long_term_memory/procedural/__init__.py:
--------------------------------------------------------------------------------
1 | from .toolbox import Toolbox
2 | from .workflow import Workflow
3 | 
4 | __all__ = ["Toolbox", "Workflow"]
5 | 


--------------------------------------------------------------------------------
/src/memorizz/long_term_memory/semantic/persona/__init__.py:
--------------------------------------------------------------------------------
1 | from .persona import Persona
2 | from .role_type import RoleType
3 | 
4 | __all__ = ["Persona", "RoleType"]
5 | 


--------------------------------------------------------------------------------
/src/memorizz/short_term_memory/__init__.py:
--------------------------------------------------------------------------------
1 | from .semantic_cache import SemanticCache
2 | from .working_memory.cwm import CWM
3 | 
4 | __all__ = ["SemanticCache", "CWM"]
5 | 


--------------------------------------------------------------------------------
/src/memorizz/memory_provider/mongodb/__init__.py:
--------------------------------------------------------------------------------
1 | from .provider import MongoDBConfig, MongoDBProvider
2 | 
3 | __all__ = [
4 |     "MongoDBProvider",
5 |     "MongoDBConfig",
6 | ]
7 | 


--------------------------------------------------------------------------------
/src/memorizz/llms/__init__.py:
--------------------------------------------------------------------------------
1 | from .azure import AzureOpenAI
2 | from .huggingface import HuggingFaceLLM
3 | from .openai import OpenAI
4 | 
5 | __all__ = ["OpenAI", "AzureOpenAI", "HuggingFaceLLM"]
6 | 


--------------------------------------------------------------------------------
/src/memorizz/memory_provider/filesystem/__init__.py:
--------------------------------------------------------------------------------
1 | """Filesystem-based memory provider."""
2 | 
3 | from .provider import FileSystemConfig, FileSystemProvider
4 | 
5 | __all__ = ["FileSystemConfig", "FileSystemProvider"]
6 | 


--------------------------------------------------------------------------------
/src/memorizz/long_term_memory/episodic/__init__.py:
--------------------------------------------------------------------------------
1 | from .conversational_memory_unit import ConversationMemoryUnit
2 | from .summary_component import SummaryComponent
3 | 
4 | __all__ = ["ConversationMemoryUnit", "SummaryComponent"]
5 | 


--------------------------------------------------------------------------------
/src/memorizz/internet_access/providers/__init__.py:
--------------------------------------------------------------------------------
1 | """Available internet access providers."""
2 | 
3 | from .firecrawl import FirecrawlProvider
4 | from .tavily import TavilyProvider
5 | 
6 | __all__ = ["FirecrawlProvider", "TavilyProvider"]
7 | 


--------------------------------------------------------------------------------
/src/memorizz/memory_provider/oracle/__init__.py:
--------------------------------------------------------------------------------
1 | from .provider import OracleConfig, OracleProvider
2 | from .setup import setup_oracle_user
3 | 
4 | __all__ = [
5 |     "OracleProvider",
6 |     "OracleConfig",
7 |     "setup_oracle_user",
8 | ]
9 | 


--------------------------------------------------------------------------------
/src/memorizz/database/__init__.py:
--------------------------------------------------------------------------------
1 | from .mongodb.mongodb_tools import MongoDBTools, MongoDBToolsConfig, get_mongodb_toolbox
2 | 
3 | __all__ = [
4 |     # MongoDB tools
5 |     "MongoDBTools",
6 |     "MongoDBToolsConfig",
7 |     "get_mongodb_toolbox",
8 | ]
9 | 


--------------------------------------------------------------------------------
/src/memorizz/enums/role.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class Role(Enum):
 5 |     """Enum for different roles in a conversation."""
 6 | 
 7 |     USER = "user"
 8 |     ASSISTANT = "assistant"
 9 |     DEVELOPER = "developer"
10 |     TOOL = "tool"
11 | 


--------------------------------------------------------------------------------
/src/memorizz/memagent/orchestrators/multi_agent_orchestrator.py:
--------------------------------------------------------------------------------
1 | """Shim module to keep backwards compatibility for multi-agent orchestrator imports."""
2 | 
3 | from ...multi_agent_orchestrator import MultiAgentOrchestrator
4 | 
5 | __all__ = ["MultiAgentOrchestrator"]
6 | 


--------------------------------------------------------------------------------
/src/memorizz/embeddings/openai/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | OpenAI Embedding Provider
 3 | 
 4 | This package contains the OpenAI embedding provider implementation.
 5 | """
 6 | 
 7 | from .provider import OpenAIEmbeddingProvider
 8 | 
 9 | __all__ = ["OpenAIEmbeddingProvider"]
10 | 


--------------------------------------------------------------------------------
/src/memorizz/embeddings/azure/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | OpenAI Embedding Provider
 3 | 
 4 | This package contains the OpenAI embedding provider implementation.
 5 | """
 6 | 
 7 | from .provider import AzureOpenAIEmbeddingProvider
 8 | 
 9 | __all__ = ["AzureOpenAIEmbeddingProvider"]
10 | 


--------------------------------------------------------------------------------
/src/memorizz/embeddings/huggingface/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Hugging Face Embedding Provider
 3 | 
 4 | Exposes the HuggingFaceEmbeddingProvider implementation.
 5 | """
 6 | 
 7 | from .provider import HuggingFaceEmbeddingProvider
 8 | 
 9 | __all__ = ["HuggingFaceEmbeddingProvider"]
10 | 


--------------------------------------------------------------------------------
/src/memorizz/embeddings/ollama/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Ollama Embedding Provider
 3 | 
 4 | This package contains the Ollama embedding provider implementation for local embeddings.
 5 | """
 6 | 
 7 | from .provider import OllamaEmbeddingProvider
 8 | 
 9 | __all__ = ["OllamaEmbeddingProvider"]
10 | 


--------------------------------------------------------------------------------
/.venv/bin/pip:
--------------------------------------------------------------------------------
 1 | #!/Users/richmondalake/Desktop/memorizz/.venv/bin/python3
 2 | # -*- coding: utf-8 -*-
 3 | import re
 4 | import sys
 5 | 
 6 | from pip._internal.cli.main import main
 7 | 
 8 | if __name__ == "__main__":
 9 |     sys.argv[0] = re.sub(r"(-script\.pyw|\.exe)?$", "", sys.argv[0])
10 |     sys.exit(main())
11 | 


--------------------------------------------------------------------------------
/.venv/bin/pip3:
--------------------------------------------------------------------------------
 1 | #!/Users/richmondalake/Desktop/memorizz/.venv/bin/python3
 2 | # -*- coding: utf-8 -*-
 3 | import re
 4 | import sys
 5 | 
 6 | from pip._internal.cli.main import main
 7 | 
 8 | if __name__ == "__main__":
 9 |     sys.argv[0] = re.sub(r"(-script\.pyw|\.exe)?$", "", sys.argv[0])
10 |     sys.exit(main())
11 | 


--------------------------------------------------------------------------------
/.venv/bin/pip3.10:
--------------------------------------------------------------------------------
 1 | #!/Users/richmondalake/Desktop/memorizz/.venv/bin/python3
 2 | # -*- coding: utf-8 -*-
 3 | import re
 4 | import sys
 5 | 
 6 | from pip._internal.cli.main import main
 7 | 
 8 | if __name__ == "__main__":
 9 |     sys.argv[0] = re.sub(r"(-script\.pyw|\.exe)?$", "", sys.argv[0])
10 |     sys.exit(main())
11 | 


--------------------------------------------------------------------------------
/.venv/bin/pip3.7:
--------------------------------------------------------------------------------
 1 | #!/Users/richmondalake/Desktop/memorizz/.venv/bin/python3
 2 | # -*- coding: utf-8 -*-
 3 | import re
 4 | import sys
 5 | 
 6 | from pip._internal.cli.main import main
 7 | 
 8 | if __name__ == "__main__":
 9 |     sys.argv[0] = re.sub(r"(-script\.pyw|\.exe)?$", "", sys.argv[0])
10 |     sys.exit(main())
11 | 


--------------------------------------------------------------------------------
/src/memorizz/memagent/handlers/__init__.py:
--------------------------------------------------------------------------------
1 | """Handler components for MemAgent processing."""
2 | 
3 | from .conversation_handler import ConversationHandler
4 | from .prompt_handler import PromptHandler
5 | from .response_handler import ResponseHandler
6 | 
7 | __all__ = ["ConversationHandler", "PromptHandler", "ResponseHandler"]
8 | 


--------------------------------------------------------------------------------
/src/memorizz/enums/semantic_cache_scope.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | 
3 | 
4 | class SemanticCacheScope(Enum):
5 |     """Scope for semantic cache searches."""
6 | 
7 |     LOCAL = "local"  # Search only this agent's cache entries (filtered by agent_id)
8 |     GLOBAL = "global"  # Search across all cache entries (no agent_id filter)
9 | 


--------------------------------------------------------------------------------
/.venv/bin/easy_install:
--------------------------------------------------------------------------------
 1 | #!/Users/richmondalake/Desktop/memorizz/.venv/bin/python3
 2 | # -*- coding: utf-8 -*-
 3 | import re
 4 | import sys
 5 | 
 6 | from setuptools.command.easy_install import main
 7 | 
 8 | if __name__ == "__main__":
 9 |     sys.argv[0] = re.sub(r"(-script\.pyw|\.exe)?$", "", sys.argv[0])
10 |     sys.exit(main())
11 | 


--------------------------------------------------------------------------------
/.venv/bin/easy_install-3.7:
--------------------------------------------------------------------------------
 1 | #!/Users/richmondalake/Desktop/memorizz/.venv/bin/python3
 2 | # -*- coding: utf-8 -*-
 3 | import re
 4 | import sys
 5 | 
 6 | from setuptools.command.easy_install import main
 7 | 
 8 | if __name__ == "__main__":
 9 |     sys.argv[0] = re.sub(r"(-script\.pyw|\.exe)?$", "", sys.argv[0])
10 |     sys.exit(main())
11 | 


--------------------------------------------------------------------------------
/src/memorizz/long_term_memory/semantic/entity_memory/__init__.py:
--------------------------------------------------------------------------------
 1 | """Entity memory exports."""
 2 | 
 3 | from .entity_memory import (
 4 |     EntityAttribute,
 5 |     EntityMemory,
 6 |     EntityMemoryRecord,
 7 |     EntityRelation,
 8 | )
 9 | 
10 | __all__ = [
11 |     "EntityMemory",
12 |     "EntityMemoryRecord",
13 |     "EntityAttribute",
14 |     "EntityRelation",
15 | ]
16 | 


--------------------------------------------------------------------------------
/src/memorizz/enums/__init__.py:
--------------------------------------------------------------------------------
 1 | from .application_mode import ApplicationMode, ApplicationModeConfig
 2 | from .memory_type import MemoryType
 3 | from .role import Role
 4 | from .semantic_cache_scope import SemanticCacheScope
 5 | 
 6 | __all__ = [
 7 |     "Role",
 8 |     "ApplicationMode",
 9 |     "ApplicationModeConfig",
10 |     "MemoryType",
11 |     "SemanticCacheScope",
12 | ]
13 | 


--------------------------------------------------------------------------------
/src/memorizz/memagent/orchestrators/__init__.py:
--------------------------------------------------------------------------------
 1 | """Orchestrator components for MemAgent coordination."""
 2 | 
 3 | from .deep_research import DeepResearchOrchestrator, DeepResearchWorkflow
 4 | from .multi_agent_orchestrator import MultiAgentOrchestrator
 5 | 
 6 | __all__ = [
 7 |     "MultiAgentOrchestrator",
 8 |     "DeepResearchOrchestrator",
 9 |     "DeepResearchWorkflow",
10 | ]
11 | 


--------------------------------------------------------------------------------
/src/memorizz/memory_unit/__init__.py:
--------------------------------------------------------------------------------
 1 | from .conversational_memory_unit import ConversationMemoryUnit
 2 | from .memory_unit import MemoryUnit
 3 | from .semantic_cache_entry import SemanticCacheEntry
 4 | from .summary_component import SummaryComponent, SummaryMetrics
 5 | 
 6 | __all__ = [
 7 |     "MemoryUnit",
 8 |     "ConversationMemoryUnit",
 9 |     "SummaryComponent",
10 |     "SummaryMetrics",
11 |     "SemanticCacheEntry",
12 | ]
13 | 


--------------------------------------------------------------------------------
/src/memorizz/memory_unit/conversational_memory_unit.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class ConversationMemoryUnit(BaseModel):
 7 |     role: str
 8 |     content: str
 9 |     timestamp: str
10 |     memory_id: str
11 |     conversation_id: str
12 |     embedding: list[float]
13 |     recall_recency: Optional[float] = None
14 |     associated_conversation_ids: Optional[list[str]] = None
15 | 


--------------------------------------------------------------------------------
/src/memorizz/memagent/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | """Utility components for MemAgent."""
 2 | 
 3 | from .formatters import PromptFormatter, ResponseFormatter
 4 | from .helpers import IDGenerator, TimestampHelper
 5 | from .validators import ConfigValidator, InputValidator
 6 | 
 7 | __all__ = [
 8 |     "ConfigValidator",
 9 |     "InputValidator",
10 |     "PromptFormatter",
11 |     "ResponseFormatter",
12 |     "IDGenerator",
13 |     "TimestampHelper",
14 | ]
15 | 


--------------------------------------------------------------------------------
/src/memorizz/embeddings/voyageai/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | VoyageAI Embedding Provider
 3 | 
 4 | This package contains the VoyageAI embedding provider implementation with support for:
 5 | - Text embeddings with multiple models and configurable dimensions
 6 | - Multimodal embeddings for text and images
 7 | - Contextualized chunk embeddings for documents
 8 | """
 9 | 
10 | from .provider import VoyageAIEmbeddingProvider
11 | 
12 | __all__ = ["VoyageAIEmbeddingProvider"]
13 | 


--------------------------------------------------------------------------------
/src/memorizz/long_term_memory/semantic/__init__.py:
--------------------------------------------------------------------------------
 1 | from .entity_memory import (
 2 |     EntityAttribute,
 3 |     EntityMemory,
 4 |     EntityMemoryRecord,
 5 |     EntityRelation,
 6 | )
 7 | from .knowledge_base import KnowledgeBase
 8 | from .persona import Persona, RoleType
 9 | 
10 | __all__ = [
11 |     "KnowledgeBase",
12 |     "Persona",
13 |     "RoleType",
14 |     "EntityMemory",
15 |     "EntityMemoryRecord",
16 |     "EntityAttribute",
17 |     "EntityRelation",
18 | ]
19 | 


--------------------------------------------------------------------------------
/src/memorizz/memagent/builders/__init__.py:
--------------------------------------------------------------------------------
 1 | """Builder components for MemAgent."""
 2 | 
 3 | from .agent_builder import (
 4 |     MemAgentBuilder,
 5 |     create_assistant,
 6 |     create_chatbot,
 7 |     create_deep_research_agent,
 8 |     create_task_agent,
 9 | )
10 | from .config_builder import ConfigBuilder
11 | 
12 | __all__ = [
13 |     "MemAgentBuilder",
14 |     "ConfigBuilder",
15 |     "create_assistant",
16 |     "create_chatbot",
17 |     "create_task_agent",
18 |     "create_deep_research_agent",
19 | ]
20 | 


--------------------------------------------------------------------------------
/src/memorizz/coordination/shared_memory/__init__.py:
--------------------------------------------------------------------------------
 1 | from .messages import (
 2 |     SharedMemoryMessage,
 3 |     SharedMemoryMessageType,
 4 |     create_command_message,
 5 |     create_report_message,
 6 |     create_status_message,
 7 | )
 8 | from .shared_memory import BlackboardEntry, SharedMemory
 9 | 
10 | __all__ = [
11 |     "SharedMemory",
12 |     "BlackboardEntry",
13 |     "SharedMemoryMessage",
14 |     "SharedMemoryMessageType",
15 |     "create_command_message",
16 |     "create_status_message",
17 |     "create_report_message",
18 | ]
19 | 


--------------------------------------------------------------------------------
/src/memorizz/long_term_memory/episodic/conversational_memory_unit.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class ConversationMemoryUnit(BaseModel):
 7 |     role: str
 8 |     content: str
 9 |     timestamp: str
10 |     memory_id: str
11 |     conversation_id: str
12 |     embedding: Optional[
13 |         list[float]
14 |     ] = None  # Optional for Oracle VECTOR (NULL vs empty list)
15 |     agent_id: Optional[str] = None
16 |     recall_recency: Optional[float] = None
17 |     associated_conversation_ids: Optional[list[str]] = None
18 | 


--------------------------------------------------------------------------------
/src/memorizz/enums/memory_type.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class MemoryType(Enum):
 5 |     """Enum for different types of memory stores."""
 6 | 
 7 |     PERSONAS = "personas"
 8 |     TOOLBOX = "toolbox"
 9 |     ENTITY_MEMORY = "entity_memory"
10 |     SHORT_TERM_MEMORY = "short_term_memory"
11 |     LONG_TERM_MEMORY = "long_term_memory"
12 |     CONVERSATION_MEMORY = "conversation_memory"
13 |     WORKFLOW_MEMORY = "workflow_memory"
14 |     MEMAGENT = "agents"
15 |     SHARED_MEMORY = "shared_memory"
16 |     SUMMARIES = "summaries"
17 |     SEMANTIC_CACHE = "semantic_cache"
18 | 


--------------------------------------------------------------------------------
/src/memorizz/memagent/constants.py:
--------------------------------------------------------------------------------
 1 | """Configuration constants for MemAgent."""
 2 | 
 3 | import os
 4 | 
 5 | # Configuration constants
 6 | DEFAULT_INSTRUCTION = "You are a helpful assistant."
 7 | DEFAULT_MAX_STEPS = 20
 8 | DEFAULT_TOOL_ACCESS = "private"
 9 | 
10 | # Logging configuration
11 | MEMORIZZ_LOG_LEVEL = os.getenv("MEMORIZZ_LOG_LEVEL", "DEBUG").upper()
12 | 
13 | # Application modes
14 | APPLICATION_MODES = {
15 |     "assistant": "General purpose assistant",
16 |     "chatbot": "Conversational chatbot",
17 |     "agent": "Task-oriented agent",
18 | }
19 | 
20 | # Memory types
21 | DEFAULT_MEMORY_TYPES = ["conversation_memory", "semantic_memory"]
22 | 


--------------------------------------------------------------------------------
/src/memorizz/memagent/managers/__init__.py:
--------------------------------------------------------------------------------
 1 | """Manager components for MemAgent functionality."""
 2 | 
 3 | from .cache_manager import CacheManager
 4 | from .entity_memory_manager import EntityMemoryManager
 5 | from .internet_access_manager import InternetAccessManager
 6 | from .memory_manager import MemoryManager
 7 | from .persona_manager import PersonaManager
 8 | from .tool_manager import ToolManager
 9 | from .workflow_manager import WorkflowManager
10 | 
11 | __all__ = [
12 |     "MemoryManager",
13 |     "ToolManager",
14 |     "CacheManager",
15 |     "PersonaManager",
16 |     "WorkflowManager",
17 |     "EntityMemoryManager",
18 |     "InternetAccessManager",
19 | ]
20 | 


--------------------------------------------------------------------------------
/src/memorizz/memory_provider/oracle/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Oracle AI Database Memory Provider Requirements
 2 | 
 3 | # Oracle Database Driver (official)
 4 | # Supports Oracle Database 23ai and 26ai with VECTOR datatype
 5 | oracledb>=2.0.0
 6 | 
 7 | # Note: This is the modern replacement for cx_Oracle
 8 | # It supports both "thick" and "thin" modes:
 9 | # - Thin mode: Pure Python, no Oracle Client required (default)
10 | # - Thick mode: Uses Oracle Client libraries (optional, for advanced features)
11 | 
12 | # Installation:
13 | #   pip install oracledb
14 | 
15 | # For thick mode (optional), you also need Oracle Instant Client:
16 | #   https://www.oracle.com/database/technologies/instant-client.html
17 | 


--------------------------------------------------------------------------------
/src/memorizz/memagent/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | MemAgent module.
 3 | 
 4 | This module provides a maintainable structure while maintaining
 5 | 100% backward compatibility with existing code.
 6 | """
 7 | 
 8 | # Import core components
 9 | from .core import MemAgent
10 | 
11 | # Optional: Import managers for advanced users
12 | from .managers import (
13 |     CacheManager,
14 |     MemoryManager,
15 |     PersonaManager,
16 |     ToolManager,
17 |     WorkflowManager,
18 | )
19 | from .models import MemAgentConfig, MemAgentModel
20 | 
21 | # Export all public APIs
22 | __all__ = [
23 |     "MemAgent",
24 |     "MemAgentModel",
25 |     "MemAgentConfig",
26 |     "MemoryManager",
27 |     "ToolManager",
28 |     "CacheManager",
29 |     "PersonaManager",
30 |     "WorkflowManager",
31 | ]
32 | 


--------------------------------------------------------------------------------
/src/memorizz/memagent.py:
--------------------------------------------------------------------------------
 1 | """
 2 | MemAgent - Backward compatibility wrapper.
 3 | 
 4 | This file maintains backward compatibility for existing code that imports
 5 | from memorizz.memagent.
 6 | 
 7 | Now uses the refactored memagent/ module with unified MemoryProvider interface.
 8 | The original implementation is preserved in memagent_original_backup.py for reference.
 9 | """
10 | 
11 | # Import from the refactored implementation
12 | from .memagent.core import MemAgent
13 | from .memagent.models import MemAgentModel
14 | 
15 | # Re-export all public APIs to maintain backward compatibility
16 | __all__ = ["MemAgent", "MemAgentModel"]
17 | 
18 | # This ensures that code like:
19 | #   from memorizz.memagent import MemAgent
20 | # continues to work with both old and new calling conventions
21 | 


--------------------------------------------------------------------------------
/src/memorizz/long_term_memory/__init__.py:
--------------------------------------------------------------------------------
 1 | from .episodic.conversational_memory_unit import ConversationMemoryUnit
 2 | from .episodic.summary_component import SummaryComponent
 3 | from .procedural.toolbox import Toolbox
 4 | from .procedural.workflow import Workflow
 5 | from .semantic.entity_memory import (
 6 |     EntityAttribute,
 7 |     EntityMemory,
 8 |     EntityMemoryRecord,
 9 |     EntityRelation,
10 | )
11 | from .semantic.knowledge_base import KnowledgeBase
12 | from .semantic.persona import Persona
13 | 
14 | __all__ = [
15 |     "KnowledgeBase",
16 |     "Persona",
17 |     "EntityMemory",
18 |     "EntityMemoryRecord",
19 |     "EntityAttribute",
20 |     "EntityRelation",
21 |     "Toolbox",
22 |     "Workflow",
23 |     "ConversationMemoryUnit",
24 |     "SummaryComponent",
25 | ]
26 | 


--------------------------------------------------------------------------------
/src/memorizz/long_term_memory/procedural/toolbox/tool_schema.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class ParameterSchema(BaseModel):
 7 |     """
 8 |     A schema for the parameter.
 9 |     """
10 | 
11 |     name: str
12 |     description: str
13 |     type: str
14 |     required: bool
15 | 
16 | 
17 | class FunctionSchema(BaseModel):
18 |     """
19 |     A schema for the function.
20 |     """
21 | 
22 |     name: str
23 |     description: str
24 |     parameters: list[ParameterSchema]
25 |     required: List[str]
26 |     queries: List[str]
27 | 
28 | 
29 | class ToolSchemaType(BaseModel):
30 |     """
31 |     A schema for the tool.
32 |     This can be the OpenAI function calling schema or Google function calling schema.
33 |     """
34 | 
35 |     type: str
36 |     function: FunctionSchema
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__/
 3 | *.py[cod]
 4 | *.so
 5 | 
 6 | # Distribution / packaging
 7 | .Python
 8 | build/
 9 | develop-eggs/
10 | dist/
11 | downloads/
12 | eggs/
13 | .eggs/
14 | lib/
15 | lib64/
16 | parts/
17 | sdist/
18 | var/
19 | wheels/
20 | *.egg-info/
21 | .installed.cfg
22 | *.egg
23 | 
24 | # Virtual environments
25 | venv/
26 | ENV/
27 | .venv/
28 | 
29 | # IDEs
30 | .vscode/
31 | .idea/
32 | 
33 | # OS generated files
34 | .DS_Store
35 | Thumbs.db
36 | 
37 | # Test Notebooks
38 | dev_test.ipynb
39 | 
40 | # Environment variables and secrets
41 | .env
42 | .env.*
43 | *.env
44 | *.key
45 | *.pem
46 | *.p12
47 | *.pfx
48 | secrets.json
49 | credentials.json
50 | config.local.*
51 | *.secret
52 | 
53 | # Examples folder (local testing only)
54 | src/memorizz/examples/
55 | 
56 | # Bug reports
57 | .bug_tracking/
58 | 
59 | # Build artifacts
60 | site/
61 | *.map
62 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | # MemoRizz Environment Variables Configuration
 2 | # Copy this file to .env and fill in your actual values
 3 | # DO NOT commit .env to version control
 4 | 
 5 | # OpenAI API Configuration (Required)
 6 | # Get your API key from: https://platform.openai.com/api-keys
 7 | OPENAI_API_KEY=
 8 | 
 9 | # Oracle Database Configuration
10 | # Admin credentials (for setup only)
11 | ORACLE_ADMIN_USER=system
12 | ORACLE_ADMIN_PASSWORD=MyPassword123!
13 | 
14 | # MemoRizz database user credentials
15 | ORACLE_USER=memorizz_user
16 | ORACLE_PASSWORD=SecurePass123!
17 | ORACLE_DSN=localhost:1521/FREEPDB1
18 | 
19 | # Optional: Backend selection (oracle, mongodb)
20 | MEMORIZZ_BACKEND=oracle
21 | 
22 | # Optional: Schema name (defaults to username)
23 | MEMORIZZ_SCHEMA=MEMORIZZ_USER
24 | 
25 | # Optional: Docker platform flag for Apple Silicon
26 | # Uncomment and set if you're on Apple Silicon (M1/M2/M3)
27 | # PLATFORM_FLAG=--platform linux/amd64
28 | 


--------------------------------------------------------------------------------
/docs/use-cases/workflow-mode.md:
--------------------------------------------------------------------------------
 1 | # Workflow Mode
 2 | 
 3 | Workflow mode targets deterministic task execution (think onboarding checklists, ticket triage, or knowledge-base upkeep). It favors procedural memory and tools over conversational depth.
 4 | 
 5 | ## Memory Stack
 6 | 
 7 | - `MemoryType.WORKFLOW_MEMORY`
 8 | - `MemoryType.TOOLBOX`
 9 | - `MemoryType.LONG_TERM_MEMORY`
10 | - `MemoryType.SHORT_TERM_MEMORY`
11 | 
12 | ## Sample Flow
13 | 
14 | ```python
15 | from memorizz.enums import ApplicationMode
16 | 
17 | agent = (MemAgentBuilder()
18 |     .with_application_mode(ApplicationMode.WORKFLOW)
19 |     .with_memory_provider(provider)
20 |     .with_tool(module_path="memorizz.tools.workflow")
21 |     .build())
22 | 
23 | agent.run("Process ticket 12491 and update the changelog")
24 | ```
25 | 
26 | Workflow mode keeps episodic memory minimal so the agent can stay focused on the currently executing process. Pair it with shared memory if you need a supervisor agent to inspect progress.
27 | 


--------------------------------------------------------------------------------
/docs/use-cases/assistant-mode.md:
--------------------------------------------------------------------------------
 1 | # Assistant Mode
 2 | 
 3 | Assistant mode is the default conversational setup for MemoRizz. It prioritizes continuity, personalization, and a rich memory stack so users feel like they're chatting with the same agent every time.
 4 | 
 5 | ## Memory Stack
 6 | 
 7 | - `MemoryType.CONVERSATION_MEMORY`
 8 | - `MemoryType.LONG_TERM_MEMORY` + `MemoryType.ENTITY_MEMORY`
 9 | - `MemoryType.PERSONAS`
10 | - `MemoryType.SHORT_TERM_MEMORY`
11 | - `MemoryType.SUMMARIES`
12 | 
13 | ## Configuration
14 | 
15 | ```python
16 | from memorizz.enums import ApplicationMode
17 | 
18 | agent = (MemAgentBuilder()
19 |     .with_application_mode(ApplicationMode.ASSISTANT)
20 |     .with_memory_provider(provider)
21 |     ...
22 |     .build())
23 | ```
24 | 
25 | ## Tips
26 | 
27 | - Seed personas with voice/tone guidelines and safety rails.
28 | - Use entity memory to store user preferences (e.g., "prefers dark mode UI").
29 | - Enable semantic cache for repeated Q&A answers to cut LLM costs.
30 | 
31 | Assistant mode is ideal for customer support, onboarding companions, or internal help desks.
32 | 


--------------------------------------------------------------------------------
/src/memorizz/memory_unit/semantic_cache_entry.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Semantic Cache Entry for MemAgent
 3 | 
 4 | Represents a cached query-response pair with metadata for semantic similarity matching.
 5 | """
 6 | 
 7 | from typing import Any, Dict, List, Optional
 8 | 
 9 | from pydantic import BaseModel
10 | 
11 | 
12 | class SemanticCacheEntry(BaseModel):
13 |     """
14 |     Represents a cached query-response pair with metadata.
15 | 
16 |     This memory unit stores semantic cache entries that enable fast retrieval
17 |     of similar queries through vector similarity matching.
18 |     """
19 | 
20 |     query: str
21 |     response: str
22 |     embedding: List[float]
23 |     timestamp: float
24 |     session_id: Optional[str] = None
25 |     memory_id: Optional[str] = None
26 |     agent_id: Optional[str] = None
27 |     usage_count: int = 0
28 |     last_accessed: Optional[float] = None
29 |     metadata: Optional[Dict[str, Any]] = None
30 |     cache_key: Optional[str] = None
31 | 
32 |     def model_post_init(self, __context) -> None:
33 |         """Initialize last_accessed if not provided."""
34 |         if self.last_accessed is None:
35 |             self.last_accessed = self.timestamp
36 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: Docs
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["main"]
 6 |   workflow_dispatch:
 7 | 
 8 | permissions:
 9 |   contents: read
10 |   pages: write
11 |   id-token: write
12 | 
13 | concurrency:
14 |   group: "pages"
15 |   cancel-in-progress: true
16 | 
17 | jobs:
18 |   build:
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       - name: Checkout
22 |         uses: actions/checkout@v4
23 |       - name: Setup Python
24 |         uses: actions/setup-python@v5
25 |         with:
26 |           python-version: "3.11"
27 |       - name: Install dependencies
28 |         run: |
29 |           python -m pip install --upgrade pip
30 |           pip install -e .[docs]
31 |       - name: Build site
32 |         run: mkdocs build --strict
33 |       - name: Upload artifact
34 |         uses: actions/upload-pages-artifact@v3
35 |         with:
36 |           path: site/
37 | 
38 |   deploy:
39 |     needs: build
40 |     runs-on: ubuntu-latest
41 |     environment:
42 |       name: github-pages
43 |       url: ${{ steps.deployment.outputs.page_url }}
44 |     steps:
45 |       - name: Deploy to GitHub Pages
46 |         id: deployment
47 |         uses: actions/deploy-pages@v4
48 | 


--------------------------------------------------------------------------------
/docs/memory-types/episodic.md:
--------------------------------------------------------------------------------
 1 | # Episodic Memory
 2 | 
 3 | Episodic memory chronicles every interaction an agent has with users, teammates, or tools. It lives under `src/memorizz/long_term_memory/episodic/` and fulfills both `MemoryType.CONVERSATION_MEMORY` and `MemoryType.SUMMARIES`.
 4 | 
 5 | ## Structure
 6 | 
 7 | - **Conversation Memory Units** – Raw transcripts with timestamps, speaker metadata, and embeddings for semantic retrieval.
 8 | - **Summaries** – Periodic rollups that compress older chunks to keep prompts small while retaining context.
 9 | 
10 | ## Example
11 | 
12 | ```python
13 | agent.memory.conversation_memory.add_message(
14 |     role="user",
15 |     content="Can you remind me of the Oracle setup steps?",
16 | )
17 | 
18 | agent.memory.summaries.create_or_update(
19 |     topic="setup",
20 |     content="User configured Oracle last week and is stuck on connection pooling.",
21 | )
22 | ```
23 | 
24 | ## Use Cases
25 | 
26 | - Long-running assistants that must reference previous sessions
27 | - Relationship and preference tracking for customer success bots
28 | - Auditable records of how multi-agent systems reached a decision
29 | 
30 | Pair episodic memory with semantic cache or working memory to prioritize the most relevant snippets for a given prompt window.
31 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # Pre-commit hooks for code quality
 2 | repos:
 3 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 4 |     rev: v4.5.0
 5 |     hooks:
 6 |       - id: trailing-whitespace
 7 |       - id: end-of-file-fixer
 8 |       - id: check-yaml
 9 |       - id: check-added-large-files
10 |       - id: check-ast  # Check Python syntax
11 |       - id: check-merge-conflict
12 | 
13 |   - repo: https://github.com/psf/black
14 |     rev: 23.12.1
15 |     hooks:
16 |       - id: black
17 |         language_version: python3.11
18 | 
19 |   - repo: https://github.com/pycqa/flake8
20 |     rev: 7.0.0
21 |     hooks:
22 |       - id: flake8
23 |         args: [
24 |           '--max-line-length=120',
25 |           '--extend-ignore=E203,E501,E712,F401,F541,F841'
26 |         ]
27 |         exclude: |
28 |           (?x)^(
29 |             .*_backup\.py|
30 |             memagent_original_backup\.py|
31 |             provider_backup\.py|
32 |             eval/.*|
33 |             tests/.*
34 |           )$
35 | 
36 |   - repo: https://github.com/pycqa/isort
37 |     rev: 5.13.2
38 |     hooks:
39 |       - id: isort
40 |         args: ['--profile', 'black']
41 | 
42 |   - repo: https://github.com/kynan/nbstripout
43 |     rev: 0.6.1
44 |     hooks:
45 |       - id: nbstripout
46 |         args: ['--extra-keys', 'metadata.collapsed,metadata.scrolled']
47 | 


--------------------------------------------------------------------------------
/docs/memory-types/short-term.md:
--------------------------------------------------------------------------------
 1 | # Short-Term Memory
 2 | 
 3 | Short-term memory is the agent's active workspace. MemoRizz separates it into a semantic cache and a working-memory controller located under `src/memorizz/short_term_memory/`.
 4 | 
 5 | ## Semantic Cache (`MemoryType.SEMANTIC_CACHE`)
 6 | 
 7 | - Stores short-lived key/value pairs with embeddings for fast similarity matches.
 8 | - Ideal for caching expensive LLM responses, transient API payloads, or session-only facts.
 9 | - Automatically expires or can be explicitly cleared when you rotate sessions.
10 | 
11 | ```python
12 | agent.memory.semantic_cache.save(
13 |     key="oracle_setup_docs",
14 |     value={"answer": "Install client, run memorizz setup-oracle"},
15 | )
16 | ```
17 | 
18 | ## Working Memory (`MemoryType.SHORT_TERM_MEMORY`)
19 | 
20 | - Tracks the active conversation window across all memory sources.
21 | - Manages token budgets by summarizing or truncating inputs before they reach the LLM.
22 | - Responsible for stitching retrieved semantic, episodic, and procedural memories into a cohesive prompt.
23 | 
24 | ```python
25 | window = agent.memory.short_term.window_for(agent_id=agent.id)
26 | window.push_user_message("Give me the highlights from yesterday's sync.")
27 | ```
28 | 
29 | Short-term memory keeps the agent grounded in the current turn while semantic + episodic stores provide long-term continuity.
30 | 


--------------------------------------------------------------------------------
/docs/memory-types/shared.md:
--------------------------------------------------------------------------------
 1 | # Shared Memory
 2 | 
 3 | Shared memory powers coordination between multiple agents. It sits in `src/memorizz/coordination/shared_memory/` and corresponds to `MemoryType.SHARED_MEMORY`.
 4 | 
 5 | ## Why It Exists
 6 | 
 7 | Complex workflows often split responsibilities across researcher, analyst, and writer agents. Shared memory provides a blackboard-like store where agents can exchange artifacts, delegate tasks, and keep track of global progress.
 8 | 
 9 | ## Creating a Session
10 | 
11 | ```python
12 | from memorizz.coordination.shared_memory import SharedMemory
13 | 
14 | shared = SharedMemory(memory_provider)
15 | session_id = shared.create_shared_session(
16 |     root_agent_id="orchestrator",
17 |     delegate_agent_ids=["researcher", "writer"],
18 | )
19 | ```
20 | 
21 | Each session keeps:
22 | 
23 | - Participants and roles
24 | - Messages and artifacts exchanged between agents
25 | - Links to the originating episodic/semantic records for traceability
26 | 
27 | ## Patterns
28 | 
29 | - Orchestrator + delegate setups (research, summarization, QA)
30 | - Human-in-the-loop review queues where both agents and operators inspect shared state
31 | - Multi-modal agents handing off voice, vision, or text data through a common buffer
32 | 
33 | Shared memory complements the per-agent stores so everyone observes the same document trail without duplicating data.
34 | 


--------------------------------------------------------------------------------
/.venv/bin/activate.csh:
--------------------------------------------------------------------------------
 1 | # This file must be used with "source bin/activate.csh" *from csh*.
 2 | # You cannot run it directly.
 3 | # Created by Davide Di Blasi <davidedb@gmail.com>.
 4 | # Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
 5 | 
 6 | alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; test "\!:*" != "nondestructive" && unalias deactivate'
 7 | 
 8 | # Unset irrelevant variables.
 9 | deactivate nondestructive
10 | 
11 | setenv VIRTUAL_ENV "/Users/richmondalake/Desktop/memorizz/.venv"
12 | 
13 | set _OLD_VIRTUAL_PATH="$PATH"
14 | setenv PATH "$VIRTUAL_ENV/bin:$PATH"
15 | 
16 | 
17 | set _OLD_VIRTUAL_PROMPT="$prompt"
18 | 
19 | if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
20 |     if (".venv" != "") then
21 |         set env_name = ".venv"
22 |     else
23 |         if (`basename "VIRTUAL_ENV"` == "__") then
24 |             # special case for Aspen magic directories
25 |             # see http://www.zetadev.com/software/aspen/
26 |             set env_name = `basename \`dirname "$VIRTUAL_ENV"\``
27 |         else
28 |             set env_name = `basename "$VIRTUAL_ENV"`
29 |         endif
30 |     endif
31 |     set prompt = "[$env_name] $prompt"
32 |     unset env_name
33 | endif
34 | 
35 | alias pydoc python -m pydoc
36 | 
37 | rehash
38 | 


--------------------------------------------------------------------------------
/docs/memory-types/semantic.md:
--------------------------------------------------------------------------------
 1 | # Semantic Memory
 2 | 
 3 | Semantic memory stores canonical facts, personas, and entity attributes that rarely change. In MemoRizz, this maps to `src/memorizz/long_term_memory/semantic/` and is backed by the `MemoryType.LONG_TERM_MEMORY` and `MemoryType.ENTITY_MEMORY` enums.
 4 | 
 5 | ## Components
 6 | 
 7 | - **Knowledge Base** – Vectorized documents segmented by namespace or topic.
 8 | - **Personas** – Behavioral instructions, tone, and guardrails that shape agent responses.
 9 | - **Entity Memory** – Structured attributes for people, organizations, or devices. The `entity_memory` module exposes helper methods to upsert and query profile fields.
10 | 
11 | ## Typical Operations
12 | 
13 | ```python
14 | kb_id = agent.memory.long_term.save_document(
15 |     namespace="support",
16 |     content="The premium plan includes unlimited vector storage.",
17 | )
18 | 
19 | agent.memory.entity_memory.upsert(
20 |     entity_id="company_acme",
21 |     attributes={"plan": "premium"},
22 | )
23 | ```
24 | 
25 | The provider automatically embeds the document, stores metadata, and tags the record with the owning agent or namespace.
26 | 
27 | ## When to Use
28 | 
29 | - Product catalogs and policy manuals
30 | - Persona systems for specialized assistants (support, researcher, interviewer)
31 | - Entity profiles that must persist across sessions and devices
32 | 
33 | Semantic memory powers long-lived recall. Pair it with episodic memory when you also care about interaction history.
34 | 


--------------------------------------------------------------------------------
/docs/memory-types/procedural.md:
--------------------------------------------------------------------------------
 1 | # Procedural Memory
 2 | 
 3 | Procedural memory captures *how* an agent should act. It bundles tool registration, workflows, and scripted behaviors so that the agent can plan or execute actions consistently. Source code lives in `src/memorizz/long_term_memory/procedural/`.
 4 | 
 5 | ## Components
 6 | 
 7 | - **Toolbox (`MemoryType.TOOLBOX`)** – Python callables wrapped with metadata so LLMs can discover and execute them safely.
 8 | - **Workflow Memory (`MemoryType.WORKFLOW_MEMORY`)** – Declarative or code-defined processes that map multi-step plans.
 9 | - **Personas** – While technically part of semantic memory, personas often work hand-in-hand with procedural steps to enforce tone and guardrails.
10 | 
11 | ## Registering Tools
12 | 
13 | ```python
14 | from memorizz.long_term_memory.procedural.toolbox import register_tool
15 | 
16 | @register_tool(name="system_status", description="Return current system status")
17 | def system_status():
18 |     ...
19 | 
20 | agent.memory.toolbox.sync_registered_tools()
21 | ```
22 | 
23 | Each tool is stored inside your configured provider with embedding metadata so agents can retrieve the right action based on the natural language plan they produce.
24 | 
25 | ## When to Reach for Procedural Memory
26 | 
27 | - Automations that call APIs, databases, or internal services
28 | - Agents that must follow compliance-friendly workflows
29 | - Research or analyst bots that gather, synthesize, then report findings based on a repeatable checklist
30 | 


--------------------------------------------------------------------------------
/docs/memory-providers/custom.md:
--------------------------------------------------------------------------------
 1 | # Bring Your Own Provider
 2 | 
 3 | MemoRizz decouples the high-level memory interfaces from the backing database via the `MemoryProvider` base class (`src/memorizz/memory_provider/base.py`). Implementing your own provider lets you plug in any datastore that can persist JSON blobs plus embeddings.
 4 | 
 5 | ## Steps
 6 | 
 7 | 1. **Subclass `MemoryProvider`** and implement CRUD helpers for each memory bucket you care about (personas, long-term memory, etc.).
 8 | 2. **Handle embeddings** – either pre-compute embeddings before storing documents or call the shared embedding registry inside your provider methods.
 9 | 3. **Respect schemas** – store the `id`, `agent_id`, `memory_type`, `data`, `embedding`, and timestamps so higher layers can filter and audit records consistently.
10 | 4. **Register the provider** – pass an instance to `MemAgentBuilder().with_memory_provider(...)`.
11 | 
12 | ```python
13 | from memorizz.memory_provider.base import MemoryProvider
14 | 
15 | class PostgresProvider(MemoryProvider):
16 |     def save_persona(self, persona):
17 |         ...
18 | ```
19 | 
20 | ## Testing Checklist
21 | 
22 | - Run the provider's unit tests under `pytest tests/memory_provider/test_<name>.py`.
23 | - Use `mkdocs serve` to confirm your new provider docs appear under **Memory Providers**.
24 | - Update `pyproject.toml` with a matching optional extra if you ship new dependencies.
25 | 
26 | Custom providers make it easy to align MemoRizz with corporate infra while keeping the rest of the SDK untouched.
27 | 


--------------------------------------------------------------------------------
/docs/memory-providers/mongodb.md:
--------------------------------------------------------------------------------
 1 | # MongoDB Provider
 2 | 
 3 | The MongoDB provider offers a lightweight starting point for experimentation or hosted Atlas deployments. It is implemented in `src/memorizz/memory_provider/mongodb/`.
 4 | 
 5 | ## Installation
 6 | 
 7 | ```bash
 8 | pip install -e ".[mongodb]"
 9 | ```
10 | 
11 | ## Configuration
12 | 
13 | ```python
14 | from memorizz.memory_provider.mongodb import MongoDBProvider, MongoDBConfig
15 | 
16 | provider = MongoDBProvider(MongoDBConfig(
17 |     uri=os.environ["MONGODB_URI"],
18 |     database="memorizz",
19 |     collection_prefix="agents",
20 | ))
21 | ```
22 | 
23 | Collections are created lazily (e.g., `agents_personas`, `agents_long_term_memory`). Each document stores:
24 | 
25 | - Serialized payload (`data`)
26 | - Embedding vectors (array fields you can index with MongoDB Atlas Vector Search)
27 | - Agent + namespace metadata
28 | 
29 | ## Atlas Vector Search
30 | 
31 | 1. Enable the [Vector Search](https://www.mongodb.com/docs/atlas/atlas-vector-search/) preview on your cluster.
32 | 2. Create an index per collection referencing the embedding field.
33 | 3. Configure the provider with your embedding model dimensions.
34 | 
35 | ## When to Choose MongoDB
36 | 
37 | - Prototype agents without running Oracle locally
38 | - Serverless / hosted deployments where MongoDB Atlas is already approved
39 | - Horizontal scaling scenarios using MongoDB's built-in sharding
40 | 
41 | Use MongoDB for agility and switch to Oracle when you need stronger relational guarantees or AI Vector Search optimizations.
42 | 


--------------------------------------------------------------------------------
/src/memorizz/__init__.py:
--------------------------------------------------------------------------------
 1 | from .coordination import SharedMemory
 2 | from .internet_access import (
 3 |     FirecrawlProvider,
 4 |     InternetAccessProvider,
 5 |     TavilyProvider,
 6 |     create_internet_access_provider,
 7 | )
 8 | from .long_term_memory.procedural.toolbox import Toolbox
 9 | from .long_term_memory.semantic import KnowledgeBase
10 | from .long_term_memory.semantic.persona import Persona, RoleType
11 | from .memagent import MemAgent
12 | from .memory_provider import MemoryProvider, MemoryType
13 | from .short_term_memory.working_memory.cwm import CWM
14 | 
15 | 
16 | # Lazy import MongoDB to avoid requiring pymongo when not needed
17 | def __getattr__(name):
18 |     if name == "MongoDBProvider":
19 |         from .memory_provider.mongodb import MongoDBProvider
20 | 
21 |         return MongoDBProvider
22 |     if name in ("FileSystemProvider", "FileSystemConfig"):
23 |         from .memory_provider.filesystem import FileSystemConfig, FileSystemProvider
24 | 
25 |         return FileSystemProvider if name == "FileSystemProvider" else FileSystemConfig
26 |     raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
27 | 
28 | 
29 | __all__ = [
30 |     "MemoryProvider",
31 |     "MongoDBProvider",
32 |     "FileSystemProvider",
33 |     "FileSystemConfig",
34 |     "MemoryType",
35 |     "Persona",
36 |     "RoleType",
37 |     "Toolbox",
38 |     "KnowledgeBase",
39 |     "CWM",
40 |     "SharedMemory",
41 |     "MemAgent",
42 |     "InternetAccessProvider",
43 |     "FirecrawlProvider",
44 |     "TavilyProvider",
45 |     "create_internet_access_provider",
46 | ]
47 | 


--------------------------------------------------------------------------------
/src/memorizz/long_term_memory/semantic/persona/role_type.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class RoleType(Enum):
 5 |     GENERAL = "General"
 6 |     ASSISTANT = "Virtual Assistant"
 7 |     CUSTOMER_SUPPORT = "Customer Support"
 8 |     TECHNICAL_EXPERT = "Technical Expert"
 9 |     RESEARCHER = "Researcher"
10 | 
11 | 
12 | # Predefined default values for each role
13 | PREDEFINED_INFO = {
14 |     RoleType.GENERAL: {
15 |         "goals": "Provide versatile support across various domains.",
16 |         "background": "A general-purpose agent designed to adapt to multiple contexts.",
17 |     },
18 |     RoleType.ASSISTANT: {
19 |         "goals": "Assist users by offering timely and personalized support.",
20 |         "background": "An assistant agent crafted to manage schedules, answer queries, and help with daily tasks.",
21 |     },
22 |     RoleType.CUSTOMER_SUPPORT: {
23 |         "goals": "Resolve customer issues promptly and provide clear guidance.",
24 |         "background": "A customer support agent specialized in understanding user concerns and delivering effective solutions.",
25 |     },
26 |     RoleType.TECHNICAL_EXPERT: {
27 |         "goals": "Provide expert technical advice and troubleshoot complex problems.",
28 |         "background": "A technical expert agent with deep domain knowledge to assist with intricate technical issues.",
29 |     },
30 |     RoleType.RESEARCHER: {
31 |         "goals": "Conduct thorough research and offer insights on advanced topics.",
32 |         "background": "A researcher agent designed to synthesize complex information and present well-informed perspectives.",
33 |     },
34 | }
35 | 


--------------------------------------------------------------------------------
/src/memorizz/internet_access/models.py:
--------------------------------------------------------------------------------
 1 | """Data structures for standardized internet access responses."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from dataclasses import dataclass, field
 6 | from typing import Any, Dict, Optional
 7 | 
 8 | 
 9 | @dataclass
10 | class InternetSearchResult:
11 |     """Normalized representation of a single search result."""
12 | 
13 |     url: str
14 |     title: Optional[str] = None
15 |     snippet: Optional[str] = None
16 |     score: Optional[float] = None
17 |     raw: Optional[Dict[str, Any]] = None
18 |     metadata: Dict[str, Any] = field(default_factory=dict)
19 | 
20 |     def to_dict(self) -> Dict[str, Any]:
21 |         """Return a serializable dict for tool / LLM consumption."""
22 |         return {
23 |             "url": self.url,
24 |             "title": self.title,
25 |             "snippet": self.snippet,
26 |             "score": self.score,
27 |             "metadata": self.metadata or None,
28 |             "raw": self.raw or None,
29 |         }
30 | 
31 | 
32 | @dataclass
33 | class InternetPageContent:
34 |     """Normalized representation of page content scraped from the web."""
35 | 
36 |     url: str
37 |     title: Optional[str] = None
38 |     content: Optional[str] = None
39 |     metadata: Dict[str, Any] = field(default_factory=dict)
40 |     raw: Optional[Dict[str, Any]] = None
41 | 
42 |     def to_dict(self) -> Dict[str, Any]:
43 |         """Return a serializable dict for tool / LLM consumption."""
44 |         return {
45 |             "url": self.url,
46 |             "title": self.title,
47 |             "content": self.content,
48 |             "metadata": self.metadata or None,
49 |             "raw": self.raw or None,
50 |         }
51 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # MemoRizz Documentation
 2 | 
 3 | MemoRizz helps you build memory-augmented agents that combine long-term knowledge, episodic context, procedural behaviors, and multi-agent coordination. This site keeps the narrative docs, quickstarts, and API references directly inside the repository so the docs never drift from the code that powers them.
 4 | 
 5 | ## What's Inside
 6 | 
 7 | - **Getting Started** walks through the core concepts and SDK setup for your first agent.
 8 | - **Memory Types** dives into each cognitive-inspired subsystem and how it maps to the source tree in `src/memorizz/`.
 9 | - **Memory Providers** describes the persistence backends (Oracle, MongoDB, or custom) that store the memories.
10 | - **Use Cases** shows how the library stitches memory stacks together for common application modes like assistants or research bots.
11 | 
12 | !!! info "Docs live with the code"
13 |     Every page in this site is rendered straight from the Markdown under `docs/`. Update a file, run `mkdocs serve`, and the change appears instantly. Merge to `main` and the GitHub Pages workflow publishes the refreshed site automatically.
14 | 
15 | ## Quick Start
16 | 
17 | ```bash
18 | pip install -e ".[docs]"
19 | mkdocs serve
20 | ```
21 | 
22 | Visit <http://localhost:8000> for a hot-reloading docs server. When you're ready to publish, run `mkdocs build --strict` or rely on the provided GitHub Action to deploy to the `gh-pages` branch.
23 | 
24 | ## Need More?
25 | 
26 | - Check the Python API reference entries embedded throughout the docs via [`mkdocstrings`](https://mkdocstrings.github.io/).
27 | - Browse real workflows in `src/memorizz/examples/` and link them into the docs with snippets or code fences.
28 | - Open an issue or discussion on [GitHub](https://github.com/RichmondAlake/memorizz) if you spot a gap.
29 | 


--------------------------------------------------------------------------------
/tests/unit/test_firecrawl_provider.py:
--------------------------------------------------------------------------------
 1 | """Unit tests for the Firecrawl internet provider."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from unittest.mock import MagicMock
 6 | 
 7 | import pytest
 8 | 
 9 | from memorizz.internet_access.providers.firecrawl import FirecrawlProvider
10 | 
11 | 
12 | @pytest.mark.unit
13 | def test_firecrawl_truncates_large_pages():
14 |     provider = FirecrawlProvider(
15 |         api_key="test-key",
16 |         base_url="https://example.com",
17 |         config={"max_content_chars": 20},
18 |     )
19 |     provider._post = MagicMock(
20 |         return_value={
21 |             "markdown": "A" * 50,
22 |             "metadata": {"title": "Example"},
23 |         }
24 |     )
25 | 
26 |     page = provider.fetch_url("https://memorizz.ai")
27 | 
28 |     assert page.metadata["content_truncated"] is True
29 |     assert page.metadata["content_original_characters"] == 50
30 |     assert page.metadata["content_returned_characters"] == 20
31 |     assert page.content.startswith("A" * 20)
32 |     assert "trimmed the page" in page.content
33 |     assert page.raw is None
34 | 
35 | 
36 | @pytest.mark.unit
37 | def test_firecrawl_can_include_sanitized_raw_payload():
38 |     provider = FirecrawlProvider(
39 |         api_key="test-key",
40 |         base_url="https://example.com",
41 |         config={"include_raw_response": True, "max_raw_chars": 5},
42 |     )
43 |     provider._post = MagicMock(
44 |         return_value={
45 |             "markdown": "abcdefg",
46 |             "metadata": {"title": "Example"},
47 |             "nested": {"rawHtml": "<p>" + "x" * 20},
48 |         }
49 |     )
50 | 
51 |     page = provider.fetch_url("https://memorizz.ai")
52 | 
53 |     assert page.raw is not None
54 |     assert page.raw["markdown"].startswith("abcde")
55 |     assert "truncated" in page.raw["markdown"]
56 |     assert "truncated" in page.raw["nested"]["rawHtml"]
57 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "memorizz"
 7 | version = "0.0.38"
 8 | description = "A memory management library for Python"
 9 | readme = "README.md"
10 | requires-python = ">=3.7"
11 | license = "MIT"
12 | authors = [
13 |     { name = "Richmond Alake", email = "richmond.alake@gmail.com" }
14 | ]
15 | classifiers = [
16 |     "Programming Language :: Python :: 3",
17 |     "License :: OSI Approved :: MIT License",
18 |     "Operating System :: OS Independent",
19 | ]
20 | dependencies = [
21 |     "openai",
22 |     "numpy",
23 |     "pydantic>=2.0.0",
24 |     "sentence-transformers>=2.5.0",
25 |     "transformers>=4.36.0",
26 |     "accelerate>=0.26.0",
27 |     "requests>=2.31.0",
28 | ]
29 | 
30 | [project.optional-dependencies]
31 | mongodb = ["pymongo>=4.0.0"]
32 | oracle = ["oracledb>=2.0.0"]
33 | filesystem = ["faiss-cpu>=1.7.4"]
34 | ollama = ["langchain_ollama", "ollama"]
35 | voyageai = ["voyageai"]
36 | huggingface = [
37 |     "numpy>=1.21,<2",
38 |     "sentence-transformers>=2.5.0",
39 |     "transformers>=4.36.0",
40 |     "accelerate>=0.26.0",
41 | ]
42 | docs = [
43 |     "mkdocs>=1.6.0",
44 |     "mkdocs-material>=9.5.0",
45 |     "mkdocstrings[python]>=0.24.0",
46 |     "mkdocs-git-revision-date-localized-plugin>=1.2.0",
47 |     "pymdown-extensions>=10.0",
48 | ]
49 | all = [
50 |     "pymongo>=4.0.0",
51 |     "oracledb>=2.0.0",
52 |     "langchain_ollama",
53 |     "ollama",
54 |     "voyageai",
55 |     "faiss-cpu>=1.7.4",
56 |     "numpy>=1.21,<2",
57 |     "sentence-transformers>=2.5.0",
58 |     "transformers>=4.36.0",
59 |     "accelerate>=0.26.0",
60 | ]
61 | 
62 | [project.scripts]
63 | memorizz = "memorizz.cli:main"
64 | 
65 | [tool.hatch.metadata]
66 | allow-direct-references = true
67 | 
68 | [tool.hatch.build.targets.wheel.sources]
69 | "src" = ""
70 | 
71 | [tool.hatch.build.targets.sdist]
72 | exclude = ["temp_unpack*"]
73 | 


--------------------------------------------------------------------------------
/src/memorizz/tests/test_vegetarian_recipe_agent.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | import pytest
 5 | from dotenv import load_dotenv
 6 | from scenario import Scenario, TestingAgent
 7 | 
 8 | # Add the project root to the Python path
 9 | project_root = os.path.abspath(
10 |     os.path.join(os.path.dirname(__file__), "..", "..", "..")
11 | )
12 | sys.path.insert(0, project_root)
13 | load_dotenv()
14 | 
15 | from ..memagent import MemAgent  # noqa: E402
16 | from ..memory_provider.mongodb.provider import (  # noqa: E402
17 |     MongoDBConfig,
18 |     MongoDBProvider,
19 | )
20 | 
21 | # Create a memory provider
22 | mongodb_config = MongoDBConfig(uri=os.environ["MONGODB_URI"])
23 | memory_provider = MongoDBProvider(mongodb_config)
24 | 
25 | Scenario.configure(testing_agent=TestingAgent(model="openai/gpt-4o-mini"))
26 | 
27 | mem_agent = MemAgent(memory_provider=memory_provider)
28 | 
29 | 
30 | @pytest.mark.agent_test
31 | @pytest.mark.asyncio
32 | async def test_vegetarian_recipe_agent():
33 |     agent = mem_agent
34 | 
35 |     def vegetarian_recipe_agent(message, context):
36 |         # Call your agent here
37 |         response = agent.run(message)
38 |         return {"message": response}
39 | 
40 |     # Define the scenario
41 |     scenario = Scenario(
42 |         "User is looking for a dinner idea",
43 |         agent=vegetarian_recipe_agent,
44 |         success_criteria=[
45 |             "Recipe agent generates a vegetarian recipe",
46 |             "Recipe includes a list of ingredients",
47 |             "Recipe includes step-by-step cooking instructions",
48 |         ],
49 |         failure_criteria=[
50 |             "The recipe is not vegetarian or includes meat",
51 |             "The agent asks more than two follow-up questions",
52 |         ],
53 |     )
54 | 
55 |     # Run the scenario and get results
56 |     result = await scenario.run()
57 | 
58 |     # Assert for pytest to know whether the test passed
59 |     assert result.success
60 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: help install lint format check test clean docs-serve docs-build
 2 | 
 3 | help:
 4 | 	@echo "Memorizz Development Commands:"
 5 | 	@echo ""
 6 | 	@echo "  make install        Install package in editable mode with dev dependencies"
 7 | 	@echo "  make lint           Run linting (flake8, check syntax)"
 8 | 	@echo "  make format         Format code with black and isort"
 9 | 	@echo "  make check          Run lint + format check (pre-commit)"
10 | 	@echo "  make test           Run tests"
11 | 	@echo "  make docs-serve     Launch mkdocs with hot reload"
12 | 	@echo "  make docs-build     Build the static documentation site"
13 | 	@echo "  make clean          Clean up generated files"
14 | 	@echo ""
15 | 
16 | install:
17 | 	pip install -e ".[dev]"
18 | 	pip install pre-commit black flake8 isort
19 | 	pre-commit install
20 | 
21 | lint:
22 | 	@echo "Running syntax check..."
23 | 	@find src/memorizz -name "*.py" ! -name "*backup*" ! -name "*original*" -exec python -m py_compile {} \;
24 | 	@echo "✓ Syntax check passed"
25 | 	@echo ""
26 | 	@echo "Running flake8..."
27 | 	@flake8 src/memorizz --max-line-length=120 --extend-ignore=E203,E501 --exclude='*backup*,*original*' || true
28 | 	@echo ""
29 | 
30 | format:
31 | 	@echo "Formatting with black..."
32 | 	@black src/memorizz
33 | 	@echo ""
34 | 	@echo "Sorting imports with isort..."
35 | 	@isort src/memorizz --profile black
36 | 	@echo ""
37 | 	@echo "✓ Code formatted"
38 | 
39 | check:
40 | 	@echo "Running pre-commit checks..."
41 | 	@pre-commit run --all-files || true
42 | 
43 | test:
44 | 	pytest tests/ -v
45 | 
46 | clean:
47 | 	find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
48 | 	find . -type f -name "*.pyc" -delete
49 | 	find . -type f -name "*.pyo" -delete
50 | 	find . -type d -name "*.egg-info" -exec rm -rf {} + 2>/dev/null || true
51 | 	find . -type d -name ".pytest_cache" -exec rm -rf {} + 2>/dev/null || true
52 | 
53 | docs-serve:
54 | 	mkdocs serve
55 | 
56 | docs-build:
57 | 	mkdocs build --strict
58 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
 1 | [tool:pytest]
 2 | # Pytest configuration for MemAgent test suite
 3 | 
 4 | # Test discovery
 5 | testpaths = tests
 6 | python_files = test_*.py *_test.py
 7 | python_classes = Test*
 8 | python_functions = test_*
 9 | 
10 | # Minimum version
11 | minversion = 6.0
12 | 
13 | # Add options
14 | addopts =
15 |     --verbose
16 |     --tb=short
17 |     --strict-markers
18 |     --strict-config
19 |     --disable-warnings
20 |     --cov=src/memorizz
21 |     --cov-report=html:htmlcov
22 |     --cov-report=term-missing
23 |     --cov-fail-under=80
24 | 
25 | # Markers for organizing tests
26 | markers =
27 |     unit: Unit tests for individual components
28 |     integration: Integration tests between components
29 |     performance: Performance and stress tests
30 |     memory: Tests for memory functionality
31 |     single_agent: Tests for single agent scenarios
32 |     multi_agent: Tests for multi-agent scenarios
33 |     backward_compatibility: Tests for backward compatibility
34 |     save_load: Tests for save/load functionality
35 |     conversation_memory: Tests for conversation memory
36 |     semantic_memory: Tests for semantic memory
37 |     episodic_memory: Tests for episodic memory
38 |     procedural_memory: Tests for procedural memory
39 |     e2e: End-to-end tests
40 |     stress: Stress testing
41 |     benchmark: Performance benchmarks
42 |     compatibility: Compatibility tests
43 |     slow: Tests that take more time to run
44 |     requires_llm: Tests that need actual LLM integration
45 |     requires_memory_provider: Tests that need memory provider
46 | 
47 | # Directories to ignore during collection
48 | norecursedirs =
49 |     .git
50 |     .pytest_cache
51 |     *.egg-info
52 |     build
53 |     dist
54 |     htmlcov
55 |     .venv
56 |     venv
57 | 
58 | # Timeout for tests (in seconds)
59 | timeout = 300
60 | 
61 | # Parallel execution
62 | # Run with: pytest -n auto
63 | filterwarnings =
64 |     ignore::DeprecationWarning
65 |     ignore::PendingDeprecationWarning
66 | 


--------------------------------------------------------------------------------
/src/memorizz/internet_access/providers/offline.py:
--------------------------------------------------------------------------------
 1 | """Offline fallback provider for environments without external access."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Any, Dict, List
 5 | 
 6 | from ..base import InternetAccessProvider, register_provider
 7 | from ..models import InternetPageContent, InternetSearchResult
 8 | 
 9 | 
10 | class OfflineInternetProvider(InternetAccessProvider):
11 |     """Provider that returns informative placeholders when internet access is disabled."""
12 | 
13 |     provider_name = "offline"
14 | 
15 |     def __init__(self, reason: str = "Internet access provider is not configured"):
16 |         super().__init__({"reason": reason})
17 |         self.reason = reason
18 | 
19 |     def search(
20 |         self, query: str, max_results: int = 5, **kwargs: Any
21 |     ) -> List[InternetSearchResult]:
22 |         message = (
23 |             f"Internet access unavailable: {self.reason}. Configure FIRECRAWL_API_KEY, "
24 |             "TAVILY_API_KEY, or MEMORIZZ_DEFAULT_INTERNET_PROVIDER to enable live search."
25 |         )
26 |         return [
27 |             InternetSearchResult(
28 |                 url="",
29 |                 title="Internet access unavailable",
30 |                 snippet=message,
31 |                 metadata={"status": "offline"},
32 |             )
33 |         ]
34 | 
35 |     def fetch_url(self, url: str, **kwargs: Any) -> InternetPageContent:
36 |         message = (
37 |             f"Cannot fetch '{url}' because internet access is disabled. "
38 |             "Configure FIRECRAWL_API_KEY, TAVILY_API_KEY, or MEMORIZZ_DEFAULT_INTERNET_PROVIDER "
39 |             "to enable browsing."
40 |         )
41 |         return InternetPageContent(
42 |             url=url,
43 |             title="Internet access unavailable",
44 |             content=message,
45 |             metadata={"status": "offline"},
46 |         )
47 | 
48 | 
49 | register_provider(OfflineInternetProvider.provider_name, OfflineInternetProvider)
50 | 


--------------------------------------------------------------------------------
/src/memorizz/memory_provider/__init__.py:
--------------------------------------------------------------------------------
 1 | from ..enums.memory_type import MemoryType
 2 | from .base import MemoryProvider
 3 | 
 4 | 
 5 | # Lazy imports for optional dependencies
 6 | def _lazy_import_mongodb():
 7 |     """Lazy import MongoDB provider (requires pymongo)."""
 8 |     try:
 9 |         from .mongodb import MongoDBProvider
10 | 
11 |         return MongoDBProvider
12 |     except ImportError as e:
13 |         raise ImportError(
14 |             "MongoDB provider requires pymongo. Install with: pip install pymongo"
15 |         ) from e
16 | 
17 | 
18 | def _lazy_import_oracle():
19 |     """Lazy import Oracle provider (requires oracledb)."""
20 |     try:
21 |         from .oracle import OracleProvider
22 | 
23 |         return OracleProvider
24 |     except ImportError as e:
25 |         raise ImportError(
26 |             "Oracle provider requires oracledb. Install with: pip install oracledb"
27 |         ) from e
28 | 
29 | 
30 | # Make providers available via module-level getattr
31 | def __getattr__(name):
32 |     if name == "MongoDBProvider":
33 |         return _lazy_import_mongodb()
34 |     elif name == "OracleProvider":
35 |         return _lazy_import_oracle()
36 |     elif name in ("FileSystemProvider", "FileSystemConfig"):
37 |         try:
38 |             from .filesystem import FileSystemConfig, FileSystemProvider
39 | 
40 |             return (
41 |                 FileSystemProvider if name == "FileSystemProvider" else FileSystemConfig
42 |             )
43 |         except ImportError as e:
44 |             raise ImportError(
45 |                 "Filesystem provider requires optional dependencies. "
46 |                 "Install FAISS (pip install faiss-cpu) for vector search support."
47 |             ) from e
48 |     raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
49 | 
50 | 
51 | __all__ = [
52 |     "MemoryProvider",
53 |     "MongoDBProvider",
54 |     "OracleProvider",
55 |     "FileSystemProvider",
56 |     "FileSystemConfig",
57 |     "MemoryType",
58 | ]
59 | 


--------------------------------------------------------------------------------
/src/memorizz/llms/llm_factory.py:
--------------------------------------------------------------------------------
 1 | # src/memorizz/llms/llm_factory.py
 2 | 
 3 | from typing import Any, Dict
 4 | 
 5 | from .azure import AzureOpenAI
 6 | from .huggingface import HuggingFaceLLM
 7 | from .llm_provider import LLMProvider
 8 | from .openai import OpenAI
 9 | 
10 | 
11 | def create_llm_provider(config: Dict[str, Any]) -> LLMProvider:
12 |     """
13 |     Factory function to create an LLM provider instance from a configuration dictionary.
14 | 
15 |     Parameters:
16 |     -----------
17 |     config : Dict[str, Any]
18 |         A dictionary containing the provider name and its specific parameters.
19 |         Example for OpenAI: {"provider": "openai", "model": "gpt-4o"}
20 |         Example for Azure: {"provider": "azure", "deployment_name": "my-gpt4"}
21 | 
22 |     Returns:
23 |     --------
24 |     LLMProvider
25 |         An instance of the specified LLM provider.
26 | 
27 |     Raises:
28 |     -------
29 |     ValueError
30 |         If the provider specified in the config is unknown.
31 |     """
32 |     provider_name = config.get("provider", "openai").lower()
33 |     if provider_name == "openai":
34 |         # Create a copy of the config and remove the 'provider' key
35 |         openai_config = config.copy()
36 |         openai_config.pop("provider", None)
37 |         return OpenAI(**openai_config)
38 | 
39 |     elif provider_name == "azure":
40 |         # Create a copy of the config and remove the 'provider' key
41 |         azure_config = config.copy()
42 |         azure_config.pop("provider", None)
43 |         return AzureOpenAI(
44 |             azure_endpoint=azure_config.get("azure_endpoint"),
45 |             api_version=azure_config.get("api_version"),
46 |             deployment_name=azure_config.get("deployment_name"),
47 |         )
48 | 
49 |     elif provider_name == "huggingface":
50 |         huggingface_config = config.copy()
51 |         huggingface_config.pop("provider", None)
52 |         return HuggingFaceLLM(**huggingface_config)
53 | 
54 |     else:
55 |         raise ValueError(f"Unknown LLM provider: '{provider_name}'")
56 | 


--------------------------------------------------------------------------------
/examples/setup_oracle_user.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Oracle Database Setup Script (Convenience Wrapper)
 3 | 
 4 | ⚠️  RECOMMENDED: For most users, use the CLI command:
 5 |     memorizz setup-oracle
 6 | 
 7 | This script is provided as a convenience wrapper for:
 8 | - Users who cloned the repository and prefer running Python scripts directly
 9 | - Development and testing scenarios
10 | 
11 | Setup Methods (in order of recommendation):
12 | 1. CLI Command (Best for pip-installed users):
13 |    memorizz setup-oracle
14 |    # or
15 |    python -m memorizz.cli setup-oracle
16 | 
17 | 2. This Script (Good for repo-cloned users):
18 |    python examples/setup_oracle_user.py
19 | 
20 | 3. Direct Import (For programmatic use):
21 |    from memorizz.memory_provider.oracle import setup_oracle_user
22 |    setup_oracle_user()
23 | 
24 | The setup automatically detects your database configuration:
25 | - Admin mode: Full setup with user creation (local/self-hosted databases)
26 | - User-only mode: Uses existing schema (hosted databases like FreeSQL.com)
27 | """
28 | 
29 | import sys
30 | 
31 | # Import from package (works for both pip-installed and repo-cloned users)
32 | try:
33 |     from memorizz.memory_provider.oracle import setup_oracle_user
34 | except ImportError:
35 |     print("✗ Failed to import setup function from memorizz package.")
36 |     print("\nPlease ensure memorizz[oracle] is installed:")
37 |     print("  pip install memorizz[oracle]")
38 |     print("\nThen use the CLI command (recommended):")
39 |     print("  memorizz setup-oracle")
40 |     print("\nOr use the Python module:")
41 |     print("  python -m memorizz.cli setup-oracle")
42 |     sys.exit(1)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     try:
47 |         success = setup_oracle_user()
48 |         sys.exit(0 if success else 1)
49 |     except KeyboardInterrupt:
50 |         print("\n\n⚠ Setup interrupted by user")
51 |         sys.exit(1)
52 |     except Exception as e:
53 |         print(f"\n\n✗ Unexpected error: {e}")
54 |         import traceback
55 | 
56 |         traceback.print_exc()
57 |         sys.exit(1)
58 | 


--------------------------------------------------------------------------------
/src/memorizz/long_term_memory/semantic/entity_memory/README.md:
--------------------------------------------------------------------------------
 1 | # Entity Memory Module
 2 | 
 3 | Entity memory provides structured long-term storage for facts about specific people,
 4 | organizations, products, or other named entities. Each entity is stored as a record of
 5 | attribute–value pairs plus optional relations to other entities so agents can recall and
 6 | update stable facts over time.
 7 | 
 8 | ## Features
 9 | 
10 | - Store entities with typed attributes, confidence scores, provenance, and timestamps
11 | - Link entities together via labeled relations (e.g., *coworker*, *purchased*)
12 | - Vector-searchable using the combined attribute text for natural-language lookup
13 | - Memory-ID aware so facts can be scoped to a specific user, tenant, or agent
14 | - Convenience helpers for recording single attributes, retrieving profiles, and
15 |   attaching relations
16 | 
17 | ## Usage
18 | 
19 | ```python
20 | from memorizz.long_term_memory.semantic.entity_memory import EntityMemory
21 | from memorizz.memory_provider.mongodb import MongoDBProvider, MongoDBConfig
22 | 
23 | provider = MongoDBProvider(MongoDBConfig("mongodb://localhost:27017"))
24 | entity_store = EntityMemory(provider)
25 | 
26 | # Create or update an entity
27 | entity_id = entity_store.upsert_entity(
28 |     name="Avery Stone",
29 |     entity_type="customer",
30 |     memory_id="tenant-123",
31 |     attributes=[{"name": "preferred_language", "value": "Japanese", "confidence": 0.95}],
32 | )
33 | 
34 | # Record a new fact without building the full payload
35 | entity_store.record_attribute(
36 |     entity_id=entity_id,
37 |     attribute_name="favorite_product",
38 |     attribute_value="Nebula Pro Drone",
39 |     source="support_chat",
40 | )
41 | 
42 | # Look up relevant entities for a query
43 | matches = entity_store.search_entities("user who likes the drone", memory_id="tenant-123")
44 | ```
45 | 
46 | The module intentionally mirrors the layout of other long-term memory components (such
47 | as the knowledge base and persona modules) so it can be attached to `MemAgent`
48 | instances or used standalone.
49 | 


--------------------------------------------------------------------------------
/setup_dev.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Memorizz Development Environment Setup Script
 4 | 
 5 | set -e
 6 | 
 7 | echo "======================================================================"
 8 | echo "  Memorizz Development Environment Setup"
 9 | echo "======================================================================"
10 | echo ""
11 | 
12 | # Check if we're in a conda environment
13 | if [ -n "$CONDA_DEFAULT_ENV" ]; then
14 |     echo "✓ Conda environment detected: $CONDA_DEFAULT_ENV"
15 | else
16 |     echo "⚠ Warning: No conda environment detected"
17 |     echo "  Consider creating one: conda create -n memorizz python=3.11"
18 | fi
19 | echo ""
20 | 
21 | # Install package in editable mode
22 | echo "Step 1: Installing Memorizz in editable mode..."
23 | pip install -e .
24 | echo "✓ Package installed"
25 | echo ""
26 | 
27 | # Install development dependencies
28 | echo "Step 2: Installing development dependencies..."
29 | pip install pre-commit black flake8 isort pytest ipython jupyter
30 | echo "✓ Development dependencies installed"
31 | echo ""
32 | 
33 | # Install pre-commit hooks
34 | echo "Step 3: Setting up pre-commit hooks..."
35 | pre-commit install
36 | echo "✓ Pre-commit hooks installed"
37 | echo ""
38 | 
39 | # Run initial format
40 | echo "Step 4: Running initial code formatting..."
41 | black src/memorizz --quiet || true
42 | isort src/memorizz --profile black --quiet || true
43 | echo "✓ Code formatted"
44 | echo ""
45 | 
46 | # Check syntax
47 | echo "Step 5: Checking Python syntax..."
48 | find src/memorizz -name "*.py" -exec python -m py_compile {} \;
49 | echo "✓ Syntax check passed"
50 | echo ""
51 | 
52 | echo "======================================================================"
53 | echo "  ✅ Development environment setup complete!"
54 | echo "======================================================================"
55 | echo ""
56 | echo "Useful commands:"
57 | echo "  make help       - Show all available commands"
58 | echo "  make lint       - Check code quality"
59 | echo "  make format     - Format code"
60 | echo "  make test       - Run tests"
61 | echo ""
62 | echo "Git hooks are now active - code will be checked before each commit!"
63 | echo ""
64 | 


--------------------------------------------------------------------------------
/docs/memory-providers/oracle.md:
--------------------------------------------------------------------------------
 1 | # Oracle Provider
 2 | 
 3 | The Oracle AI Database provider offers fully managed JSON + vector storage for every MemoRizz memory type. It targets Oracle 23ai/26ai and lives in `src/memorizz/memory_provider/oracle/`.
 4 | 
 5 | ## Highlights
 6 | 
 7 | - Native VECTOR datatype with automatic HNSW indexes
 8 | - Connection pooling + lazy schema creation
 9 | - Works with JSON Relational Duality Views for structured + vector queries
10 | 
11 | ## Installation
12 | 
13 | ```bash
14 | pip install -e ".[oracle]"
15 | ```
16 | 
17 | ## Configuration
18 | 
19 | ```python
20 | from memorizz.memory_provider.oracle import OracleProvider, OracleConfig
21 | 
22 | provider = OracleProvider(OracleConfig(
23 |     user="memorizz_user",
24 |     password="SecurePass123!",
25 |     dsn="localhost:1521/FREEPDB1",
26 |     schema="MEMORIZZ",
27 |     embedding_provider="openai",
28 |     embedding_config={"model": "text-embedding-3-small"},
29 |     lazy_vector_indexes=False,
30 | ))
31 | ```
32 | 
33 | Set `lazy_vector_indexes=True` if you want faster cold starts and are ok with indexes being created on demand.
34 | 
35 | ## Database Prep
36 | 
37 | 1. Create a dedicated user with `CREATE SESSION`, `CREATE TABLE`, `CREATE INDEX`, `UNLIMITED TABLESPACE`.
38 | 2. Grant `EXECUTE ON DBMS_VECTOR` for vector search.
39 | 3. Run `memorizz setup-oracle` or the scripts in `src/memorizz/memory_provider/oracle/` to create the tables.
40 | 
41 | ## Tables
42 | 
43 | Every memory bucket gets its own table plus a VECTOR index:
44 | 
45 | - `personas`
46 | - `toolbox`
47 | - `long_term_memory`
48 | - `entity_memory`
49 | - `short_term_memory`
50 | - `conversation_memory`
51 | - `workflow_memory`
52 | - `shared_memory`
53 | - `summaries`
54 | - `semantic_cache`
55 | 
56 | ## Troubleshooting
57 | 
58 | - **Vector datatype missing** – Ensure you're running 23ai+ and have `DBMS_VECTOR` privileges.
59 | - **Connection refused** – Use Easy Connect Plus (`host:port/service`) or TNS alias strings.
60 | - **Slow cold start** – Enable `lazy_vector_indexes` or pre-create indexes manually using the SQL files in the provider folder.
61 | 
62 | For the full reference, open `src/memorizz/memory_provider/oracle/README.md`.
63 | 


--------------------------------------------------------------------------------
/tests/unit/test_tavily_provider.py:
--------------------------------------------------------------------------------
 1 | """Unit tests for the Tavily internet provider."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from unittest.mock import MagicMock
 6 | 
 7 | import pytest
 8 | 
 9 | from memorizz.internet_access.providers.tavily import TavilyProvider
10 | 
11 | 
12 | @pytest.mark.unit
13 | def test_tavily_search_normalizes_results():
14 |     provider = TavilyProvider(
15 |         api_key="test-key",
16 |         base_url="https://api.tavily.com",
17 |         config={"include_raw_results": True},
18 |     )
19 |     provider._post = MagicMock(
20 |         return_value={
21 |             "results": [
22 |                 {
23 |                     "url": "https://example.com/doc",
24 |                     "title": "Example Doc",
25 |                     "content": "Snippet",
26 |                     "score": 0.8,
27 |                     "site": "example.com",
28 |                     "published_date": "2024-01-01",
29 |                 }
30 |             ]
31 |         }
32 |     )
33 | 
34 |     results = provider.search("Example query", max_results=2)
35 | 
36 |     assert len(results) == 1
37 |     assert results[0].url == "https://example.com/doc"
38 |     assert results[0].metadata["site"] == "example.com"
39 |     assert results[0].raw["title"] == "Example Doc"
40 |     provider._post.assert_called_once()
41 | 
42 | 
43 | @pytest.mark.unit
44 | def test_tavily_fetch_truncates_content_and_returns_raw():
45 |     provider = TavilyProvider(
46 |         api_key="test-key",
47 |         base_url="https://api.tavily.com",
48 |         config={"max_content_chars": 20, "include_raw_page": True},
49 |     )
50 |     provider._post = MagicMock(
51 |         return_value={
52 |             "results": [
53 |                 {
54 |                     "url": "https://example.com/doc",
55 |                     "title": "Example Doc",
56 |                     "content": "A" * 40,
57 |                     "metadata": {"lang": "en"},
58 |                     "site": "example.com",
59 |                 }
60 |             ]
61 |         }
62 |     )
63 | 
64 |     page = provider.fetch_url("https://example.com/doc")
65 | 
66 |     assert page.metadata["content_truncated"] is True
67 |     assert page.metadata["content_returned_characters"] == 20
68 |     assert len(page.content) == 20
69 |     assert page.raw["title"] == "Example Doc"
70 | 


--------------------------------------------------------------------------------
/eval/README.md:
--------------------------------------------------------------------------------
 1 | # Memorizz Evaluation Framework
 2 | 
 3 | This directory contains evaluation scripts and benchmarks for testing Memorizz's memory capabilities across various tasks and scenarios.
 4 | 
 5 | ## Structure
 6 | 
 7 | ```
 8 | eval/
 9 | ├── README.md              # This file
10 | ├── longmemeval/           # LongMemEval benchmark evaluation
11 | │   ├── evaluate_memorizz.py  # Main evaluation script
12 | │   └── README.md          # LongMemEval specific documentation
13 | └── [future benchmarks]/  # Additional evaluation frameworks
14 | ```
15 | 
16 | ## Overview
17 | 
18 | The evaluation framework is designed to assess Memorizz's performance on various memory-related tasks, providing objective metrics to track improvements and compare against other agent memory systems.
19 | 
20 | ## Available Benchmarks
21 | 
22 | ### LongMemEval
23 | LongMemEval is a comprehensive benchmark for evaluating long-term memory capabilities of chat assistants. It tests five core memory abilities:
24 | 
25 | 1. **Information Extraction** - Recalling specific information from extensive histories
26 | 2. **Multi-Session Reasoning** - Synthesizing information across multiple conversation sessions  
27 | 3. **Knowledge Updates** - Recognizing and updating changed user information over time
28 | 4. **Temporal Reasoning** - Understanding time-aware aspects of information
29 | 5. **Abstention** - Knowing when to refuse answering based on insufficient information
30 | 
31 | ## Quick Start
32 | 
33 | 1. Install dependencies:
34 | ```bash
35 | pip install datasets transformers openai
36 | ```
37 | 
38 | 2. Set up environment variables:
39 | ```bash
40 | export OPENAI_API_KEY="your_openai_api_key"
41 | export MONGODB_URI="your_mongodb_uri"
42 | ```
43 | 
44 | 3. Run LongMemEval evaluation:
45 | ```bash
46 | cd eval/longmemeval
47 | python evaluate_memorizz.py
48 | ```
49 | 
50 | ## Adding New Benchmarks
51 | 
52 | To add a new evaluation benchmark:
53 | 
54 | 1. Create a new directory under `eval/`
55 | 2. Implement an evaluation script that follows the pattern in `longmemeval/evaluate_memorizz.py`
56 | 3. Update this README with documentation for your benchmark
57 | 4. Add any necessary dependencies to the project requirements
58 | 
59 | ## Results
60 | 
61 | Evaluation results will be saved in JSON format with timestamps, allowing for easy tracking of performance improvements over time. 


--------------------------------------------------------------------------------
/install_oracle_client.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Simple Oracle Instant Client Installer for macOS
 3 | # Usage: ./install_oracle_client.sh
 4 | 
 5 | set -e
 6 | 
 7 | INSTALL_DIR="$HOME/oracle/instantclient"
 8 | DOWNLOADS_DIR="$HOME/Downloads"
 9 | 
10 | echo "🔍 Looking for Oracle Instant Client ZIP in Downloads..."
11 | 
12 | # Find the downloaded ZIP file
13 | ZIP_FILE=$(find "$DOWNLOADS_DIR" -name "instantclient-basic-macos.arm64-*.zip" -o -name "instantclient-basic-macosx.x86-64-*.zip" 2>/dev/null | head -1)
14 | 
15 | if [ -z "$ZIP_FILE" ]; then
16 |     echo "❌ Oracle Instant Client ZIP not found in Downloads folder"
17 |     echo ""
18 |     echo "📥 Please download it first:"
19 |     echo "   Apple Silicon: https://www.oracle.com/database/technologies/instant-client/macos-arm64-downloads.html"
20 |     echo "   Intel Mac: https://www.oracle.com/database/technologies/instant-client/macosx-x86-64-downloads.html"
21 |     echo ""
22 |     echo "   Download the 'Basic Package' ZIP file, then run this script again."
23 |     exit 1
24 | fi
25 | 
26 | echo "✅ Found: $(basename "$ZIP_FILE")"
27 | echo "📦 Extracting to $INSTALL_DIR..."
28 | 
29 | # Create directory and extract
30 | mkdir -p "$(dirname "$INSTALL_DIR")"
31 | unzip -q "$ZIP_FILE" -d "$(dirname "$INSTALL_DIR")"
32 | 
33 | # Find the extracted directory (version number may vary)
34 | EXTRACTED_DIR=$(find "$(dirname "$INSTALL_DIR")" -type d -name "instantclient_*" | head -1)
35 | 
36 | if [ -z "$EXTRACTED_DIR" ]; then
37 |     echo "❌ Extraction failed or directory not found"
38 |     exit 1
39 | fi
40 | 
41 | echo "✅ Extracted to: $EXTRACTED_DIR"
42 | 
43 | # Add to .zshrc
44 | if ! grep -q "DYLD_LIBRARY_PATH.*instantclient" ~/.zshrc 2>/dev/null; then
45 |     echo "" >> ~/.zshrc
46 |     echo "# Oracle Instant Client" >> ~/.zshrc
47 |     echo "export DYLD_LIBRARY_PATH=$EXTRACTED_DIR:\$DYLD_LIBRARY_PATH" >> ~/.zshrc
48 |     echo "✅ Added to ~/.zshrc"
49 | else
50 |     echo "⚠️  DYLD_LIBRARY_PATH already configured in ~/.zshrc"
51 | fi
52 | 
53 | # Source it for current session
54 | export DYLD_LIBRARY_PATH="$EXTRACTED_DIR:$DYLD_LIBRARY_PATH"
55 | 
56 | echo ""
57 | echo "✅ Installation complete!"
58 | echo ""
59 | echo "📝 To use in Python:"
60 | echo "   import oracledb"
61 | echo "   oracledb.init_oracle_client(lib_dir=\"$EXTRACTED_DIR\")"
62 | echo ""
63 | echo "💡 Restart your terminal or run: source ~/.zshrc"
64 | 


--------------------------------------------------------------------------------
/docs/getting-started/overview.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | MemoRizz is a composable memory framework for AI agents. It ships opinionated agent builders, configurable memory providers, and a cognitive-inspired architecture so that every memory you store is intentional.
 4 | 
 5 | ## Architecture at a Glance
 6 | 
 7 | ```
 8 | src/memorizz/
 9 | ├── long_term_memory/      # semantic, procedural, episodic systems
10 | ├── short_term_memory/     # semantic cache + working memory
11 | ├── coordination/          # shared memory for multi-agent orchestration
12 | ├── memory_provider/       # Oracle, MongoDB, custom backends
13 | └── memagent/              # builders + runtime orchestration
14 | ```
15 | 
16 | Each folder owns the implementation for a specific memory subsystem. Agent presets ("application modes") simply select the right combination of these subsystems.
17 | 
18 | ## Key Capabilities
19 | 
20 | | Capability | Description | Code Entry Point |
21 | |------------|-------------|------------------|
22 | | Long-term semantic memory | Fact + entity graph storage with embeddings | `long_term_memory/semantic/`
23 | | Procedural memory | Toolboxes and workflows for behavior execution | `long_term_memory/procedural/`
24 | | Episodic memory | Conversation history, summaries, and experiences | `long_term_memory/episodic/`
25 | | Short-term memory | Working context buffer + semantic cache | `short_term_memory/`
26 | | Memory providers | Database-specific persistence logic | `memory_provider/`
27 | | Application modes | Pre-bundled stacks per use case | `enums/application_mode.py`
28 | 
29 | !!! tip "Map docs to code"
30 |     Every section in this site mirrors these modules. When you update a doc, link back to the concrete module (for example ``::: memorizz.memagent.builders.MemAgentBuilder``) so the rendered API reference always matches the running code.
31 | 
32 | ## Requirements
33 | 
34 | - Python 3.7+
35 | - An embedding/LLM provider such as OpenAI or Hugging Face
36 | - A memory provider backend (Oracle 23ai/26ai, MongoDB, or your own `MemoryProvider` implementation)
37 | 
38 | ## Next Steps
39 | 
40 | 1. Read through the [Concepts](concepts.md) page to understand each memory type.
41 | 2. Pick a provider under [Memory Providers](../memory-providers/oracle.md) and configure credentials.
42 | 3. Follow the [Python SDK Quickstart](python-sdk-quickstart.md) to spin up your first `MemAgent`.
43 | 


--------------------------------------------------------------------------------
/.venv/bin/activate:
--------------------------------------------------------------------------------
 1 | # This file must be used with "source bin/activate" *from bash*
 2 | # you cannot run it directly
 3 | 
 4 | deactivate () {
 5 |     # reset old environment variables
 6 |     if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
 7 |         PATH="${_OLD_VIRTUAL_PATH:-}"
 8 |         export PATH
 9 |         unset _OLD_VIRTUAL_PATH
10 |     fi
11 |     if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
12 |         PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
13 |         export PYTHONHOME
14 |         unset _OLD_VIRTUAL_PYTHONHOME
15 |     fi
16 | 
17 |     # This should detect bash and zsh, which have a hash command that must
18 |     # be called to get it to forget past commands.  Without forgetting
19 |     # past commands the $PATH changes we made may not be respected
20 |     if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
21 |         hash -r
22 |     fi
23 | 
24 |     if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
25 |         PS1="${_OLD_VIRTUAL_PS1:-}"
26 |         export PS1
27 |         unset _OLD_VIRTUAL_PS1
28 |     fi
29 | 
30 |     unset VIRTUAL_ENV
31 |     if [ ! "${1:-}" = "nondestructive" ] ; then
32 |     # Self destruct!
33 |         unset -f deactivate
34 |     fi
35 | }
36 | 
37 | # unset irrelevant variables
38 | deactivate nondestructive
39 | 
40 | VIRTUAL_ENV="/Users/richmondalake/Desktop/memorizz/.venv"
41 | export VIRTUAL_ENV
42 | 
43 | _OLD_VIRTUAL_PATH="$PATH"
44 | PATH="$VIRTUAL_ENV/bin:$PATH"
45 | export PATH
46 | 
47 | # unset PYTHONHOME if set
48 | # this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
49 | # could use `if (set -u; : $PYTHONHOME) ;` in bash
50 | if [ -n "${PYTHONHOME:-}" ] ; then
51 |     _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
52 |     unset PYTHONHOME
53 | fi
54 | 
55 | if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
56 |     _OLD_VIRTUAL_PS1="${PS1:-}"
57 |     if [ "x(.venv) " != x ] ; then
58 | 	PS1="(.venv) ${PS1:-}"
59 |     else
60 |     if [ "`basename \"$VIRTUAL_ENV\"`" = "__" ] ; then
61 |         # special case for Aspen magic directories
62 |         # see http://www.zetadev.com/software/aspen/
63 |         PS1="[`basename \`dirname \"$VIRTUAL_ENV\"\``] $PS1"
64 |     else
65 |         PS1="(`basename \"$VIRTUAL_ENV\"`)$PS1"
66 |     fi
67 |     fi
68 |     export PS1
69 | fi
70 | 
71 | # This should detect bash and zsh, which have a hash command that must
72 | # be called to get it to forget past commands.  Without forgetting
73 | # past commands the $PATH changes we made may not be respected
74 | if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
75 |     hash -r
76 | fi
77 | 


--------------------------------------------------------------------------------
/src/memorizz/memagent/models.py:
--------------------------------------------------------------------------------
 1 | """Data models for MemAgent configuration and state."""
 2 | 
 3 | from typing import Any, Dict, List, Optional, Union
 4 | 
 5 | from pydantic import BaseModel, Field
 6 | 
 7 | from .constants import DEFAULT_INSTRUCTION, DEFAULT_MAX_STEPS, DEFAULT_TOOL_ACCESS
 8 | 
 9 | 
10 | class MemAgentModel(BaseModel):
11 |     """Data model for persisting and loading MemAgent configuration."""
12 | 
13 |     model: Optional[Any] = None
14 |     llm_config: Optional[Dict[str, Any]] = None  # Configuration for the LLM
15 |     agent_id: Optional[str] = None
16 |     tools: Optional[Union[List, Any]] = None
17 |     persona: Optional[Any] = None
18 |     instruction: Optional[str] = Field(default=DEFAULT_INSTRUCTION)
19 |     application_mode: Optional[str] = "assistant"
20 |     memory_types: Optional[
21 |         List[str]
22 |     ] = None  # Custom memory types that override application_mode defaults
23 |     max_steps: int = Field(default=DEFAULT_MAX_STEPS)
24 |     memory_ids: Optional[List[str]] = None
25 |     tool_access: Optional[str] = Field(default=DEFAULT_TOOL_ACCESS)
26 |     long_term_memory_ids: Optional[List[str]] = None
27 |     delegates: Optional[List[str]] = None  # Store delegate agent IDs
28 |     embedding_config: Optional[Dict[str, Any]] = None
29 |     semantic_cache: Optional[bool] = False  # Enable semantic cache
30 |     semantic_cache_config: Optional[
31 |         Union[Any, Dict[str, Any]]
32 |     ] = None  # Semantic cache configuration
33 |     context_window_tokens: Optional[int] = None
34 |     internet_access_provider: Optional[str] = None
35 |     internet_access_config: Optional[Dict[str, Any]] = None
36 | 
37 |     model_config = {
38 |         "arbitrary_types_allowed": True  # Allow arbitrary types like Toolbox
39 |     }
40 | 
41 | 
42 | class MemAgentConfig:
43 |     """Configuration helper for MemAgent initialization."""
44 | 
45 |     def __init__(
46 |         self,
47 |         instruction: str = DEFAULT_INSTRUCTION,
48 |         max_steps: int = DEFAULT_MAX_STEPS,
49 |         tool_access: str = DEFAULT_TOOL_ACCESS,
50 |         semantic_cache: bool = False,
51 |         **kwargs,
52 |     ):
53 |         self.instruction = instruction
54 |         self.max_steps = max_steps
55 |         self.tool_access = tool_access
56 |         self.semantic_cache = semantic_cache
57 | 
58 |         # Store additional configuration
59 |         for key, value in kwargs.items():
60 |             setattr(self, key, value)
61 | 
62 |     def to_dict(self) -> Dict[str, Any]:
63 |         """Convert configuration to dictionary."""
64 |         return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
65 | 


--------------------------------------------------------------------------------
/docs/getting-started/python-sdk-quickstart.md:
--------------------------------------------------------------------------------
 1 | # Python SDK Quickstart
 2 | 
 3 | This walkthrough spins up a fully stateful agent with Oracle as the backing provider. Swap in another provider if you prefer MongoDB or a custom backend.
 4 | 
 5 | ## 1. Install Dependencies
 6 | 
 7 | ```bash
 8 | pip install -e ".[docs]"         # documentation + tooling
 9 | pip install -e ".[oracle]"       # choose oracle/mongodb/ollama/etc. as needed
10 | ```
11 | 
12 | Add or export your provider + LLM credentials (see `.env.example`).
13 | 
14 | ## 2. Bootstrap Oracle (optional)
15 | 
16 | ```bash
17 | ./install_oracle.sh          # starts Oracle 23ai locally
18 | memorizz setup-oracle        # prepares schemas and tables
19 | ```
20 | 
21 | The setup script automatically creates the JSON + vector tables for every memory bucket (personas, long-term memory, semantic cache, etc.).
22 | 
23 | ## 3. Configure Embeddings
24 | 
25 | ```python
26 | from memorizz.embeddings import configure_embeddings
27 | 
28 | configure_embeddings("openai", {
29 |     "model": "text-embedding-3-small",
30 |     "api_key": os.environ["OPENAI_API_KEY"],
31 | })
32 | ```
33 | 
34 | ## 4. Build an Agent
35 | 
36 | ```python
37 | from memorizz.memory_provider.oracle import OracleProvider, OracleConfig
38 | from memorizz.memagent.builders import MemAgentBuilder
39 | 
40 | oracle_provider = OracleProvider(
41 |     OracleConfig(
42 |         user="memorizz_user",
43 |         password="SecurePass123!",
44 |         dsn="localhost:1521/FREEPDB1",
45 |         embedding_provider="openai",
46 |     )
47 | )
48 | 
49 | agent = (MemAgentBuilder()
50 |     .with_instruction("You are a helpful assistant with persistent memory.")
51 |     .with_memory_provider(oracle_provider)
52 |     .with_llm_config({
53 |         "provider": "openai",
54 |         "model": "gpt-4o-mini",
55 |         "api_key": os.environ["OPENAI_API_KEY"],
56 |     })
57 |     .build())
58 | ```
59 | 
60 | ## 5. Run and Inspect Memory
61 | 
62 | ```python
63 | response = agent.run("Hello, my name is Leah and I like dark mode UIs.")
64 | print(response)
65 | 
66 | # Save a structured entity profile
67 | agent.memory.entity_memory.upsert(
68 |     entity_id="leah",
69 |     attributes={"preferences": ["dark mode UIs", "Python"]}
70 | )
71 | ```
72 | 
73 | Check your provider (Oracle, MongoDB) to see the stored JSON, embeddings, and metadata for each memory bucket.
74 | 
75 | ## Where to Go Next
76 | 
77 | - Review every memory subsystem under [Memory Types](../memory-types/semantic.md).
78 | - Point a different provider at the agent with `MemAgentBuilder().with_memory_provider(...)`.
79 | - Embed API docs inline with ``::: memorizz.memagent.memagent.MemAgent`` to expose parameters inside this site.
80 | 


--------------------------------------------------------------------------------
/.venv/bin/activate.fish:
--------------------------------------------------------------------------------
 1 | # This file must be used with ". bin/activate.fish" *from fish* (http://fishshell.org)
 2 | # you cannot run it directly
 3 | 
 4 | function deactivate  -d "Exit virtualenv and return to normal shell environment"
 5 |     # reset old environment variables
 6 |     if test -n "$_OLD_VIRTUAL_PATH"
 7 |         set -gx PATH $_OLD_VIRTUAL_PATH
 8 |         set -e _OLD_VIRTUAL_PATH
 9 |     end
10 |     if test -n "$_OLD_VIRTUAL_PYTHONHOME"
11 |         set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
12 |         set -e _OLD_VIRTUAL_PYTHONHOME
13 |     end
14 | 
15 |     if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
16 |         functions -e fish_prompt
17 |         set -e _OLD_FISH_PROMPT_OVERRIDE
18 |         functions -c _old_fish_prompt fish_prompt
19 |         functions -e _old_fish_prompt
20 |     end
21 | 
22 |     set -e VIRTUAL_ENV
23 |     if test "$argv[1]" != "nondestructive"
24 |         # Self destruct!
25 |         functions -e deactivate
26 |     end
27 | end
28 | 
29 | # unset irrelevant variables
30 | deactivate nondestructive
31 | 
32 | set -gx VIRTUAL_ENV "/Users/richmondalake/Desktop/memorizz/.venv"
33 | 
34 | set -gx _OLD_VIRTUAL_PATH $PATH
35 | set -gx PATH "$VIRTUAL_ENV/bin" $PATH
36 | 
37 | # unset PYTHONHOME if set
38 | if set -q PYTHONHOME
39 |     set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
40 |     set -e PYTHONHOME
41 | end
42 | 
43 | if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
44 |     # fish uses a function instead of an env var to generate the prompt.
45 | 
46 |     # save the current fish_prompt function as the function _old_fish_prompt
47 |     functions -c fish_prompt _old_fish_prompt
48 | 
49 |     # with the original prompt function renamed, we can override with our own.
50 |     function fish_prompt
51 |         # Save the return status of the last command
52 |         set -l old_status $status
53 | 
54 |         # Prompt override?
55 |         if test -n "(.venv) "
56 |             printf "%s%s" "(.venv) " (set_color normal)
57 |         else
58 |             # ...Otherwise, prepend env
59 |             set -l _checkbase (basename "$VIRTUAL_ENV")
60 |             if test $_checkbase = "__"
61 |                 # special case for Aspen magic directories
62 |                 # see http://www.zetadev.com/software/aspen/
63 |                 printf "%s[%s]%s " (set_color -b blue white) (basename (dirname "$VIRTUAL_ENV")) (set_color normal)
64 |             else
65 |                 printf "%s(%s)%s" (set_color -b blue white) (basename "$VIRTUAL_ENV") (set_color normal)
66 |             end
67 |         end
68 | 
69 |         # Restore the return status of the previous command.
70 |         echo "exit $old_status" | .
71 |         _old_fish_prompt
72 |     end
73 | 
74 |     set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
75 | end
76 | 


--------------------------------------------------------------------------------
/src/memorizz/llms/llm_provider.py:
--------------------------------------------------------------------------------
 1 | # src/memorizz/llms/llm_provider.py
 2 | 
 3 | from typing import (
 4 |     TYPE_CHECKING,
 5 |     Any,
 6 |     Callable,
 7 |     Dict,
 8 |     List,
 9 |     Optional,
10 |     Protocol,
11 |     runtime_checkable,
12 | )
13 | 
14 | # Use TYPE_CHECKING to handle forward references for type hints
15 | if TYPE_CHECKING:
16 |     pass
17 | 
18 | """
19 | A protocol in Python (introduced in PEP 544 and part of the typing module) defines a structural typing rule.
20 | It specifies a set of methods and properties that a class must implement,
21 | but it does not require inheritance.
22 | 
23 | "If it walks like a duck and quacks like a duck, it's probably a duck." 🦆
24 | 
25 | """
26 | 
27 | 
28 | @runtime_checkable
29 | class LLMProvider(Protocol):
30 |     """
31 |     A generic protocol that defines the contract for any LLM provider
32 |     to be compatible with both the OpenAI and AzureOpenAI classes.
33 |     """
34 | 
35 |     # --- Attributes ---
36 |     client: Any
37 |     """Provides direct access to the underlying API client instance (e.g., openai.OpenAI or openai.AzureOpenAI)."""
38 | 
39 |     model: str
40 |     """Stores the specific model or deployment name as a string (e.g., "gpt-4o")."""
41 | 
42 |     # --- Methods ---
43 |     def get_tool_metadata(self, func: Callable) -> Dict[str, Any]:
44 |         """Creates structured metadata (a JSON schema) from a Python function."""
45 |         ...
46 | 
47 |     def augment_docstring(self, docstring: str) -> str:
48 |         """Uses the LLM to enhance a function's docstring with more detail."""
49 |         ...
50 | 
51 |     def generate_queries(self, docstring: str) -> List[str]:
52 |         """Generates a list of example user queries for a given tool."""
53 |         ...
54 | 
55 |     def generate_text(self, prompt: str, instructions: Optional[str] = None) -> str:
56 |         """A high-level method for simple text generation."""
57 |         ...
58 | 
59 |     def generate(
60 |         self,
61 |         messages: List[Dict[str, str]],
62 |         tools: Optional[List[Dict[str, Any]]] = None,
63 |         tool_choice: str = "auto",
64 |     ) -> Any:
65 |         """Generate a response from a list of messages (chat format), optionally with tool calling."""
66 |         ...
67 | 
68 |     def get_config(self) -> Dict[str, Any]:
69 |         """
70 |         Returns a serializable dictionary of the provider's configuration.
71 |         This is used for saving and reconstructing the agent.
72 |         """
73 |         ...
74 | 
75 |     def get_last_usage(self) -> Optional[Dict[str, int]]:
76 |         """Return token usage details (prompt/completion/total) from the most recent call."""
77 |         ...
78 | 
79 |     def get_context_window_tokens(self) -> Optional[int]:
80 |         """Return the provider's context window size in tokens, when known."""
81 |         ...
82 | 


--------------------------------------------------------------------------------
/src/memorizz/internet_access/base.py:
--------------------------------------------------------------------------------
 1 | """Base classes for internet access providers."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import logging
 6 | from abc import ABC, abstractmethod
 7 | from typing import Any, Dict, List, Optional
 8 | 
 9 | from .models import InternetPageContent, InternetSearchResult
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | class InternetAccessProvider(ABC):
15 |     """Interface for providers that offer internet search / browsing."""
16 | 
17 |     provider_name: str = "base"
18 | 
19 |     def __init__(self, config: Optional[Dict[str, Any]] = None):
20 |         self._config = config or {}
21 | 
22 |     def get_provider_name(self) -> str:
23 |         """Return the provider name."""
24 |         return getattr(self, "provider_name", self.__class__.__name__).lower()
25 | 
26 |     def get_config(self) -> Dict[str, Any]:
27 |         """Return serializable config information."""
28 |         return dict(self._config)
29 | 
30 |     @abstractmethod
31 |     def search(
32 |         self, query: str, max_results: int = 5, **kwargs
33 |     ) -> List[InternetSearchResult]:
34 |         """Search the internet and return normalized results."""
35 | 
36 |     @abstractmethod
37 |     def fetch_url(self, url: str, **kwargs) -> InternetPageContent:
38 |         """Fetch and parse the contents of a specific URL."""
39 | 
40 |     def close(self) -> None:
41 |         """Cleanup resources (override when necessary)."""
42 |         return None
43 | 
44 | 
45 | _PROVIDER_REGISTRY: Dict[str, type[InternetAccessProvider]] = {}
46 | 
47 | 
48 | def register_provider(name: str, provider_cls: type[InternetAccessProvider]) -> None:
49 |     """Register an internet access provider by name."""
50 |     _PROVIDER_REGISTRY[name.lower()] = provider_cls
51 | 
52 | 
53 | def get_provider_class(name: str) -> Optional[type[InternetAccessProvider]]:
54 |     """Return the provider class for a given name."""
55 |     if not name:
56 |         return None
57 |     return _PROVIDER_REGISTRY.get(name.lower())
58 | 
59 | 
60 | def create_internet_access_provider(
61 |     name: str, config: Optional[Dict[str, Any]] = None
62 | ) -> Optional[InternetAccessProvider]:
63 |     """Instantiate a provider from the registry."""
64 |     provider_cls = get_provider_class(name)
65 |     if not provider_cls:
66 |         logger.warning("Unknown internet access provider: %s", name)
67 |         return None
68 | 
69 |     config = config or {}
70 |     try:
71 |         return provider_cls(**config)
72 |     except TypeError:
73 |         try:
74 |             return provider_cls(config=config)  # type: ignore[arg-type]
75 |         except TypeError as exc:
76 |             logger.error(
77 |                 "Failed to initialize provider '%s' with config keys: %s",
78 |                 name,
79 |                 list(config.keys()),
80 |             )
81 |             raise exc
82 | 


--------------------------------------------------------------------------------
/docs/getting-started/concepts.md:
--------------------------------------------------------------------------------
 1 | # Concepts
 2 | 
 3 | MemoRizz models agent cognition around a handful of composable building blocks. Understanding these types makes it easier to reason about what your application mode actually enables.
 4 | 
 5 | ## Memory Types
 6 | 
 7 | | Enum | Purpose | Realization |
 8 | |------|---------|-------------|
 9 | | `MemoryType.LONG_TERM_MEMORY` | Semantic knowledge base | Namespaces, personas, entity memory |
10 | | `MemoryType.ENTITY_MEMORY` | Structured profile data tied to entities | Attribute/value store with provenance |
11 | | `MemoryType.TOOLBOX` + `MemoryType.WORKFLOW_MEMORY` | Toolbox and workflow behaviors | `long_term_memory/procedural/` |
12 | | `MemoryType.CONVERSATION_MEMORY` | Episodic timeline of interactions | `long_term_memory/episodic/`
13 | | `MemoryType.SUMMARIES` | Cached digests of long conversations | `long_term_memory/episodic/summaries.py`
14 | | `MemoryType.SHORT_TERM_MEMORY` | Working context window | `short_term_memory/working_memory/`
15 | | `MemoryType.SEMANTIC_CACHE` | Fast, short-lived fact lookups | `short_term_memory/semantic_cache/`
16 | | `MemoryType.SHARED_MEMORY` | Coordination between multiple agents | `coordination/shared_memory/`
17 | 
18 | !!! note
19 |     The `MemoryType` enum lives in `src/memorizz/enums/memory_type.py`. Extending it is the first step when you want to introduce a new storage primitive.
20 | 
21 | ## Memories vs. Providers
22 | 
23 | - **Memory types** describe *what* your agent can recall.
24 | - **Memory providers** describe *where* the data lives (Oracle, MongoDB, local experiment, etc.).
25 | - **Application modes** (see `src/memorizz/enums/application_mode.py`) simply select the right combination of memories for a task. For example `ASSISTANT` activates conversation history, long-term facts, personas, and summaries; `DEEP_RESEARCH` focuses on toolbox access and shared memory.
26 | 
27 | ## Lifecycle
28 | 
29 | 1. **Capture** – Agents persist facts by calling methods on the active memory types (e.g., saving a persona or upserting entity attributes).
30 | 2. **Index** – Providers embed relevant fields using your configured embedding provider.
31 | 3. **Retrieve** – During a run, the `MemAgent` orchestrator fetches relevant rows from each memory and mixes them into the prompt stack.
32 | 4. **Summarize** – Episodic memory periodically compacts older interactions into summary memories that keep the context window manageable while preserving detail.
33 | 
34 | ## How to Explore Further
35 | 
36 | - Inspect `src/memorizz/MEMORY_ARCHITECTURE.md` for the full architecture notes that ship with the codebase.
37 | - Use `mkdocstrings` directives inside any doc page to render live API reference blocks, e.g.
38 | 
39 | ```markdown
40 | ::: memorizz.memagent.builders.MemAgentBuilder
41 |     handler: python
42 | ```
43 | 
44 | That directive renders directly from the Python source, so your docs always match the SDK version in the repository.
45 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: MemoRizz
 2 | site_description: Documentation for the MemoRizz memory framework for AI agents
 3 | site_url: https://richmondalake.github.io/memorizz
 4 | repo_name: RichmondAlake/memorizz
 5 | repo_url: https://github.com/RichmondAlake/memorizz
 6 | edit_uri: edit/main/docs/
 7 | 
 8 | nav:
 9 |   - Getting Started:
10 |       - Overview: getting-started/overview.md
11 |       - Concepts: getting-started/concepts.md
12 |       - Python SDK Quickstart: getting-started/python-sdk-quickstart.md
13 |   - Memory Types:
14 |       - Semantic Memory: memory-types/semantic.md
15 |       - Episodic Memory: memory-types/episodic.md
16 |       - Procedural Memory: memory-types/procedural.md
17 |       - Short-Term Memory: memory-types/short-term.md
18 |       - Shared Memory: memory-types/shared.md
19 |   - Memory Providers:
20 |       - Oracle Provider: memory-providers/oracle.md
21 |       - MongoDB Provider: memory-providers/mongodb.md
22 |       - Filesystem Provider: memory-providers/filesystem.md
23 |       - Bring Your Own Provider: memory-providers/custom.md
24 |   - Use Cases:
25 |       - Assistant Mode: use-cases/assistant-mode.md
26 |       - Workflow Mode: use-cases/workflow-mode.md
27 |       - Deep Research Mode: use-cases/deep-research-mode.md
28 |   - Internet Access Providers: internet-access/providers.md
29 |   - Utilities:
30 |       - Context Window Stats: utilities/context_window_stats.md
31 | 
32 | theme:
33 |   name: material
34 |   language: en
35 |   features:
36 |     - navigation.instant
37 |     - navigation.sections
38 |     - navigation.tabs
39 |     - navigation.top
40 |     - navigation.footer
41 |     - toc.integrate
42 |     - search.suggest
43 |     - search.highlight
44 |     - content.code.copy
45 |     - content.code.annotate
46 |   palette:
47 |     - scheme: default
48 |       primary: deep purple
49 |       accent: indigo
50 |     - scheme: slate
51 |       primary: deep purple
52 |       accent: lime
53 |   font:
54 |     text: "Inter"
55 |     code: "JetBrains Mono"
56 | 
57 | markdown_extensions:
58 |   - admonition
59 |   - footnotes
60 |   - toc:
61 |       permalink: true
62 |   - pymdownx.details
63 |   - pymdownx.superfences
64 |   - pymdownx.tabbed:
65 |       alternate_style: true
66 |   - pymdownx.snippets
67 |   - pymdownx.highlight:
68 |       anchor_linenums: true
69 |   - pymdownx.inlinehilite
70 |   - pymdownx.keys
71 | 
72 | plugins:
73 |   - search
74 |   - git-revision-date-localized:
75 |       fallback_to_build_date: true
76 |       enable_creation_date: true
77 |   - mkdocstrings:
78 |       handlers:
79 |         python:
80 |           paths: [src]
81 |           options:
82 |             docstring_style: google
83 |             show_if_no_docstring: false
84 |             filters: []
85 |             heading_level: 2
86 |             show_category_heading: true
87 |             show_source: true
88 | 
89 | extra:
90 |   social:
91 |     - icon: fontawesome/brands/github
92 |       link: https://github.com/RichmondAlake/memorizz
93 | 


--------------------------------------------------------------------------------
/src/memorizz/internet_access/__init__.py:
--------------------------------------------------------------------------------
 1 | """Internet access provider interfaces and implementations."""
 2 | 
 3 | import logging
 4 | import os
 5 | 
 6 | from .base import (
 7 |     InternetAccessProvider,
 8 |     create_internet_access_provider,
 9 |     get_provider_class,
10 |     register_provider,
11 | )
12 | from .models import InternetPageContent, InternetSearchResult
13 | from .providers.firecrawl import FirecrawlProvider
14 | from .providers.offline import OfflineInternetProvider
15 | from .providers.tavily import TavilyProvider
16 | 
17 | logger = logging.getLogger(__name__)
18 | 
19 | DEFAULT_PROVIDER_ENV = "MEMORIZZ_DEFAULT_INTERNET_PROVIDER"
20 | DEFAULT_PROVIDER_API_KEY_ENV = "MEMORIZZ_DEFAULT_INTERNET_PROVIDER_API_KEY"
21 | 
22 | __all__ = [
23 |     "InternetAccessProvider",
24 |     "InternetPageContent",
25 |     "InternetSearchResult",
26 |     "FirecrawlProvider",
27 |     "TavilyProvider",
28 |     "OfflineInternetProvider",
29 |     "create_internet_access_provider",
30 |     "register_provider",
31 |     "get_provider_class",
32 |     "get_default_internet_access_provider",
33 | ]
34 | 
35 | 
36 | def get_default_internet_access_provider() -> InternetAccessProvider:
37 |     """
38 |     Return a usable internet provider for Deep Research agents.
39 | 
40 |     Preference order:
41 |     1. Explicit provider via MEMORIZZ_DEFAULT_INTERNET_PROVIDER.
42 |     2. Tavily (TAVILY_API_KEY).
43 |     3. Firecrawl (FIRECRAWL_API_KEY).
44 |     4. Offline provider placeholder so the tool still responds.
45 |     """
46 | 
47 |     provider_name = os.getenv(DEFAULT_PROVIDER_ENV)
48 |     provider_config = {}
49 |     if provider_name:
50 |         api_key = os.getenv(DEFAULT_PROVIDER_API_KEY_ENV)
51 |         if api_key:
52 |             provider_config["api_key"] = api_key
53 |         try:
54 |             provider = create_internet_access_provider(provider_name, provider_config)
55 |             if provider:
56 |                 return provider
57 |         except Exception as exc:  # pragma: no cover - best effort fallback
58 |             logger.warning(
59 |                 "Failed to initialize provider '%s' from env: %s", provider_name, exc
60 |             )
61 | 
62 |     tavily_key = os.getenv("TAVILY_API_KEY")
63 |     if tavily_key:
64 |         try:
65 |             return TavilyProvider(api_key=tavily_key)
66 |         except Exception as exc:  # pragma: no cover - best effort fallback
67 |             logger.warning("Failed to initialize Tavily provider: %s", exc)
68 | 
69 |     firecrawl_key = os.getenv("FIRECRAWL_API_KEY")
70 |     if firecrawl_key:
71 |         try:
72 |             return FirecrawlProvider(api_key=firecrawl_key)
73 |         except Exception as exc:  # pragma: no cover - best effort fallback
74 |             logger.warning("Failed to initialize Firecrawl provider: %s", exc)
75 | 
76 |     reason = (
77 |         "Set TAVILY_API_KEY, FIRECRAWL_API_KEY, or MEMORIZZ_DEFAULT_INTERNET_PROVIDER "
78 |         "to enable live internet access."
79 |     )
80 |     return OfflineInternetProvider(reason=reason)
81 | 


--------------------------------------------------------------------------------
/docs/memory-providers/filesystem.md:
--------------------------------------------------------------------------------
 1 | # Filesystem Provider
 2 | 
 3 | The filesystem provider persists every MemoRizz memory type as JSON files on disk and uses FAISS for vector similarity search. It is ideal for local development, CI runs, or lightweight deployments where running MongoDB/Oracle would be overkill.
 4 | 
 5 | ## Highlights
 6 | 
 7 | - No external database required—everything lives under the configured root directory.
 8 | - Works with the exact same `MemoryProvider` API as Oracle/MongoDB, so agents can swap providers without code changes.
 9 | - Optional FAISS acceleration for semantic queries with automatic fallbacks to cosine or keyword search when embeddings are missing.
10 | 
11 | ## Installation
12 | 
13 | ```bash
14 | pip install memorizz[filesystem]
15 | ```
16 | 
17 | This installs `faiss-cpu`. If you skip the extra, the provider still works but falls back to keyword search until FAISS (and an embedding provider) are available.
18 | 
19 | ## Configuration
20 | 
21 | ```python
22 | from pathlib import Path
23 | from memorizz.memory_provider import FileSystemConfig, FileSystemProvider
24 | 
25 | config = FileSystemConfig(
26 |     root_path=Path("~/.memorizz").expanduser(),  # Each MemoryType gets its own folder
27 |     lazy_vector_indexes=True,                    # Build FAISS indexes on demand
28 |     embedding_provider="openai",                 # Optional, enables semantic search
29 |     embedding_config={"model": "text-embedding-3-small"},
30 | )
31 | 
32 | provider = FileSystemProvider(config)
33 | ```
34 | 
35 | - `root_path` is the only required field. The provider creates subdirectories named after each `MemoryType`.
36 | - Set `lazy_vector_indexes=True` to skip vector index builds until a semantic query hits a store.
37 | - You can also pass a fully constructed `EmbeddingManager` instance via `embedding_provider` for complete control.
38 | 
39 | ## Storage Layout
40 | 
41 | ```
42 | ~/.memorizz/
43 | ├── conversation_memory/
44 | │   ├── index.json                # Lightweight metadata for quick lookups
45 | │   ├── 4c1d9a2f.json             # Individual memory documents
46 | │   └── vector.index (optional)   # Saved FAISS index when embeddings are enabled
47 | ├── long_term_memory/
48 | │   └── …
49 | └── agents/                       # Stored MemAgent configurations
50 | ```
51 | 
52 | Each JSON file contains the raw document plus MemoRizz metadata (`_id`, `memory_id`, timestamps, embeddings, etc.). When FAISS is installed, the provider builds an in-memory index and snapshots it to `vector.index` for fast restarts.
53 | 
54 | ## Usage Tips
55 | 
56 | - **Embeddings optional**: If you only need deterministic lookups (ID/name filters), skip embedding configuration and the provider will stick to metadata filtering/keyword search.
57 | - **Backups**: Because everything is plain JSON, standard tools (`tar`, `rsync`, cloud sync) can back up or relocate memory stores easily.
58 | - **Cleanup**: Call `delete_memagent(..., cascade=True)` to remove all memories tied to an agent (the provider deletes the related JSON files).
59 | 
60 | For in-depth details, see `src/memorizz/memory_provider/filesystem/provider.py`.
61 | 


--------------------------------------------------------------------------------
/src/memorizz/memagent/managers/internet_access_manager.py:
--------------------------------------------------------------------------------
 1 | """Manager responsible for routing internet access actions to providers."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import logging
 6 | from typing import Any, Dict, List, Optional
 7 | 
 8 | from ...internet_access import InternetAccessProvider
 9 | from ...internet_access.models import InternetPageContent, InternetSearchResult
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | class InternetAccessManager:
15 |     """Wrapper over InternetAccessProvider implementations."""
16 | 
17 |     def __init__(self, provider: Optional[InternetAccessProvider] = None):
18 |         self.provider = provider
19 | 
20 |     def set_provider(
21 |         self, provider: Optional[InternetAccessProvider]
22 |     ) -> Optional[InternetAccessProvider]:
23 |         """Attach or detach an internet provider."""
24 |         previous = self.provider
25 |         if previous and previous is not provider:
26 |             try:
27 |                 previous.close()
28 |             except Exception as exc:
29 |                 logger.debug("Failed to close previous internet provider: %s", exc)
30 |         self.provider = provider
31 |         return previous
32 | 
33 |     def is_enabled(self) -> bool:
34 |         """Return True if provider is available."""
35 |         return self.provider is not None
36 | 
37 |     def get_provider_name(self) -> Optional[str]:
38 |         if not self.provider:
39 |             return None
40 |         return self.provider.get_provider_name()
41 | 
42 |     def get_provider_config(self) -> Optional[Dict[str, Any]]:
43 |         if not self.provider:
44 |             return None
45 |         return self.provider.get_config()
46 | 
47 |     def search(
48 |         self, query: str, max_results: int = 5, **kwargs
49 |     ) -> List[Dict[str, Any]]:
50 |         """Execute a search query using the provider."""
51 |         if not self.provider:
52 |             raise ValueError("Internet access provider is not configured")
53 |         results = self.provider.search(query=query, max_results=max_results, **kwargs)
54 |         return [self._result_to_dict(item) for item in results]
55 | 
56 |     def fetch_url(self, url: str, **kwargs) -> Dict[str, Any]:
57 |         """Fetch a URL using the provider."""
58 |         if not self.provider:
59 |             raise ValueError("Internet access provider is not configured")
60 |         page = self.provider.fetch_url(url=url, **kwargs)
61 |         return self._page_to_dict(page)
62 | 
63 |     # Serialization helpers -------------------------------------------------
64 |     def _result_to_dict(self, result: Any) -> Dict[str, Any]:
65 |         if isinstance(result, InternetSearchResult):
66 |             return result.to_dict()
67 |         if isinstance(result, dict):
68 |             return result
69 |         return {"value": result}
70 | 
71 |     def _page_to_dict(self, page: Any) -> Dict[str, Any]:
72 |         if isinstance(page, InternetPageContent):
73 |             return page.to_dict()
74 |         if isinstance(page, dict):
75 |             return page
76 |         return {"content": page}
77 | 


--------------------------------------------------------------------------------
/src/memorizz/short_term_memory/working_memory/cwm.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | # from ..memagent import MemAgent
 4 | from ...enums.memory_type import MemoryType
 5 | 
 6 | 
 7 | # Can take in an agent and then return a prompt that informs the agent on how to manage the context window
 8 | class CWM:
 9 |     # def __init__(self, agent: MemAgent):
10 |     #     self.agent = agent
11 | 
12 |     @staticmethod
13 |     def get_prompt_from_memory_types(memory_types: List[MemoryType]):
14 |         prompt = "You are an AI Agent endowed with a powerful, multi-tiered memory augmentation system. Your mission is to use all available memory modalities to deliver consistent, accurate, and context-rich responses. The aim is to esure that through augmented memory, you become belivable, capable, and reliable."
15 | 
16 |         for memory_type in memory_types:
17 |             prompt += CWM._generate_prompt_for_memory_type(memory_type)
18 | 
19 |         return prompt
20 | 
21 |     @staticmethod
22 |     def _generate_prompt_for_memory_type(memory_type: MemoryType):
23 |         # Define memory type prompts in a dictionary for better maintainability
24 |         memory_prompts = {
25 |             MemoryType.CONVERSATION_MEMORY: {
26 |                 "description": "This is a memory type that stores the conversation history between the agent and the user.",
27 |                 "usage": "Use this to provide continuity, avoid repeating yourself, and reference prior turns.",
28 |             },
29 |             MemoryType.WORKFLOW_MEMORY: {
30 |                 "description": "This is a memory type that stores the workflow history between the agent and the user.",
31 |                 "usage": "Use this to provide continuity, avoid repeating yourself, and reference prior turns.",
32 |             },
33 |             MemoryType.SHARED_MEMORY: {
34 |                 "description": "This is a memory type that stores shared blackboard information for multi-agent coordination.",
35 |                 "usage": "Use this to coordinate with other agents, understand your role in the agent hierarchy, and access shared coordination activities and context.",
36 |             },
37 |             MemoryType.SUMMARIES: {
38 |                 "description": "This is a memory type that stores compressed summaries of past conversations and interactions to preserve important context while managing memory efficiently.",
39 |                 "usage": "Use these summaries to understand the broader context of your interactions with the user, recall important topics, preferences, and past decisions. This helps you provide more personalized and context-aware responses even when specific conversations are no longer in active memory.",
40 |             },
41 |         }
42 | 
43 |         # Get the prompt configuration for this memory type
44 |         prompt_config = memory_prompts.get(memory_type)
45 | 
46 |         if prompt_config:
47 |             prompt = f"\n\nMemory Type: {memory_type.value}\n"
48 |             prompt += f"Memory Type Description: {prompt_config['description']}\n"
49 |             prompt += f"Memory Type Usage: {prompt_config['usage']}\n"
50 |             return prompt
51 |         else:
52 |             # Handle unknown memory types gracefully
53 |             return f"\n\nMemory Type: {memory_type.value}\n"
54 | 
55 | 
56 | # Can take in an array of memory stores and then return a prompt that informs the agent on how to manage the context window
57 | 


--------------------------------------------------------------------------------
/docs/utilities/context_window_stats.md:
--------------------------------------------------------------------------------
 1 | # Context Window Stats Utility
 2 | 
 3 | Track how much of the model's context window a MemAgent has consumed at any point during a conversation. The agent records usage every time it calls the underlying LLM and makes the latest snapshot accessible through logs and code.
 4 | 
 5 | ## What the Agent Records
 6 | 
 7 | Each snapshot includes:
 8 | 
 9 | - `timestamp`: ISO-8601 timestamp for the measurement
10 | - `stage`: the agent stage that triggered the measurement (e.g., `iteration_1`, `memory_compression`)
11 | - `prompt_tokens`: number of tokens sent to the model
12 | - `completion_tokens`: tokens generated by the model
13 | - `total_tokens`: sum of prompt + completion tokens
14 | - `context_window_tokens`: configured or inferred window size
15 | - `percentage_used`: total usage / context window × 100
16 | 
17 | If the provider does not return usage information, MemoRizz falls back to `None` so downstream code can handle missing values gracefully.
18 | 
19 | ## Logging
20 | 
21 | By default the agent logs each measurement at `INFO` level:
22 | 
23 | ```
24 | Context window usage (iteration_2): 2,350/128,000 tokens (1.84%) | prompt=2,100 completion=250
25 | ```
26 | 
27 | Monitor your existing log stream (e.g., `tail -f app.log`) to watch the token budget drain in real time.
28 | 
29 | ## Programmatic Access
30 | 
31 | Use `memagent.get_context_window_stats()` to retrieve the most recent snapshot after an interaction:
32 | 
33 | ```python
34 | response = agent.run("Summarize the workshop agenda we discussed yesterday.")
35 | 
36 | stats = agent.get_context_window_stats()
37 | if stats:
38 |     print(
39 |         f"Total tokens: {stats['total_tokens']}"
40 |         f" ({stats['percentage_used']:.2f}% of {stats['context_window_tokens']})"
41 |     )
42 | else:
43 |     print("Provider did not return usage information.")
44 | ```
45 | 
46 | Snapshots are ordinary dictionaries, so you can emit them to observability pipelines, dashboards, or audits.
47 | 
48 | ## Configuring the Context Window
49 | 
50 | MemoRizz tries to detect the context window automatically:
51 | 
52 | 1. Use the explicit `context_window_tokens` argument passed to `MemAgent` or `MemAgentBuilder`.
53 | 2. If not provided, look for `context_window_tokens` / `max_context_tokens` / `context_window` inside `llm_config`.
54 | 3. Fall back to the provider's built-in knowledge (OpenAI & Azure expose known limits; Hugging Face derives the tokenizer limit).
55 | 
56 | You can override the inferred value at any time:
57 | 
58 | ```python
59 | agent = (MemAgentBuilder()
60 |     .with_llm_config({"provider": "openai", "model": "gpt-4o-mini"})
61 |     .with_memory_provider(provider)
62 |     .build()
63 | )
64 | 
65 | # Later, adjust for a custom fine-tuned model
66 | agent._context_window_tokens = 32_000
67 | ```
68 | 
69 | > **Tip:** When you know the exact budget (e.g., for a fine-tuned or local model) always pass it explicitly so percentage calculations remain accurate.
70 | 
71 | ## Provider Support
72 | 
73 | - **OpenAI / Azure OpenAI:** Token usage comes directly from the API response (`response.usage`).
74 | - **Hugging Face:** The provider counts tokens via the active tokenizer (falling back to whitespace splitting when needed).
75 | - **Custom Providers:** Implement the `LLMProvider` protocol’s `get_last_usage()` and `get_context_window_tokens()` methods to plug into the same reporting pipeline.
76 | 
77 | With these hooks in place, every MemAgent—single or multi-step—can report how close it is to the model’s context limit, helping you catch runaway prompts before they overflow the window.
78 | 


--------------------------------------------------------------------------------
/src/memorizz/cli.py:
--------------------------------------------------------------------------------
  1 | """CLI commands for Memorizz."""
  2 | 
  3 | import os
  4 | import subprocess
  5 | import sys
  6 | from pathlib import Path
  7 | 
  8 | 
  9 | def install_oracle():
 10 |     """Install Oracle database using install_oracle.sh script."""
 11 |     # Try to find install_oracle.sh script
 12 |     # Check multiple possible locations
 13 |     possible_paths = [
 14 |         # Current directory (for local development)
 15 |         Path("install_oracle.sh"),
 16 |         # Package scripts directory (when installed from PyPI)
 17 |         Path(__file__).parent / "scripts" / "install_oracle.sh",
 18 |         # Repository root (if installed in editable mode or running from repo)
 19 |         Path(__file__).parent.parent.parent / "install_oracle.sh",
 20 |         # Alternative repository root path
 21 |         Path(__file__).parent.parent.parent.parent / "install_oracle.sh",
 22 |     ]
 23 | 
 24 |     script_path = None
 25 |     for path in possible_paths:
 26 |         if path.exists() and path.is_file():
 27 |             script_path = path
 28 |             break
 29 | 
 30 |     if not script_path:
 31 |         print("✗ install_oracle.sh script not found")
 32 |         print("\nThe install_oracle.sh script is only available when:")
 33 |         print("  1. You've cloned the repository, or")
 34 |         print("  2. You're running from the repository directory")
 35 |         print("\nAlternative: Install Oracle manually with Docker:")
 36 |         print("  docker run -d --name oracle-memorizz -p 1521:1521 \\")
 37 |         print("    -e ORACLE_PWD=MyPassword123! \\")
 38 |         print("    container-registry.oracle.com/database/free:latest-lite")
 39 |         print("\nOr use the script directly if you have it:")
 40 |         print("  ./install_oracle.sh")
 41 |         return False
 42 | 
 43 |     # Make script executable
 44 |     os.chmod(script_path, 0o755)
 45 | 
 46 |     # Execute the script
 47 |     try:
 48 |         result = subprocess.run(
 49 |             [str(script_path)],
 50 |             check=False,  # Don't raise exception on non-zero exit
 51 |             capture_output=False,  # Show output in real-time
 52 |         )
 53 |         return result.returncode == 0
 54 |     except Exception as e:
 55 |         print(f"✗ Failed to execute install_oracle.sh: {e}")
 56 |         return False
 57 | 
 58 | 
 59 | def setup_oracle():
 60 |     """Run Oracle database setup."""
 61 |     try:
 62 |         from memorizz.memory_provider.oracle import setup_oracle_user
 63 | 
 64 |         return setup_oracle_user()
 65 |     except ImportError as e:
 66 |         print(f"✗ Failed to import setup module: {e}")
 67 |         print("\nPlease ensure memorizz[oracle] is installed:")
 68 |         print("  pip install memorizz[oracle]")
 69 |         return False
 70 | 
 71 | 
 72 | def main():
 73 |     """Main CLI entry point."""
 74 |     if len(sys.argv) < 2:
 75 |         print("Memorizz CLI")
 76 |         print("\nAvailable commands:")
 77 |         print("  install-oracle  Install Oracle database container")
 78 |         print("  setup-oracle    Set up Oracle database schema")
 79 |         print("\nUsage:")
 80 |         print("  memorizz install-oracle")
 81 |         print("  memorizz setup-oracle")
 82 |         print("  python -m memorizz.cli <command>")
 83 |         sys.exit(1)
 84 | 
 85 |     command = sys.argv[1]
 86 | 
 87 |     if command == "install-oracle":
 88 |         success = install_oracle()
 89 |         sys.exit(0 if success else 1)
 90 |     elif command == "setup-oracle":
 91 |         success = setup_oracle()
 92 |         sys.exit(0 if success else 1)
 93 |     else:
 94 |         print(f"✗ Unknown command: {command}")
 95 |         print("Run 'memorizz' or 'python -m memorizz.cli' for help")
 96 |         sys.exit(1)
 97 | 
 98 | 
 99 | if __name__ == "__main__":
100 |     main()
101 | 


--------------------------------------------------------------------------------
/src/memorizz/coordination/shared_memory/messages.py:
--------------------------------------------------------------------------------
  1 | """Typed message helpers for shared memory coordination."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import uuid
  6 | from dataclasses import dataclass, field
  7 | from datetime import datetime
  8 | from enum import Enum
  9 | from typing import Any, Dict, List, Optional
 10 | 
 11 | 
 12 | class SharedMemoryMessageType(str, Enum):
 13 |     """Supported shared memory message types."""
 14 | 
 15 |     COMMAND = "COMMAND"
 16 |     STATUS = "STATUS"
 17 |     REPORT = "REPORT"
 18 |     QUESTION = "QUESTION"
 19 | 
 20 | 
 21 | @dataclass
 22 | class SharedMemoryMessage:
 23 |     """Base shared memory message."""
 24 | 
 25 |     message_type: SharedMemoryMessageType
 26 |     payload: Dict[str, Any]
 27 |     message_id: str = field(default_factory=lambda: str(uuid.uuid4()))
 28 |     created_at: str = field(default_factory=lambda: datetime.utcnow().isoformat())
 29 | 
 30 |     def to_dict(self) -> Dict[str, Any]:
 31 |         """Return serializable payload."""
 32 |         return {
 33 |             "message_id": self.message_id,
 34 |             "message_type": self.message_type.value,
 35 |             "created_at": self.created_at,
 36 |             "payload": self.payload,
 37 |         }
 38 | 
 39 | 
 40 | def _validate_fields(data: Dict[str, Any], fields: List[str], message_type: str):
 41 |     missing = [field for field in fields if not data.get(field)]
 42 |     if missing:
 43 |         raise ValueError(
 44 |             f"{message_type} message missing required fields: {', '.join(missing)}"
 45 |         )
 46 | 
 47 | 
 48 | def create_command_message(
 49 |     command_id: str,
 50 |     target_agent_id: str,
 51 |     instructions: str,
 52 |     priority: int = 3,
 53 |     dependencies: Optional[List[str]] = None,
 54 |     metadata: Optional[Dict[str, Any]] = None,
 55 | ) -> SharedMemoryMessage:
 56 |     """Build a validated COMMAND message payload."""
 57 |     payload = {
 58 |         "command_id": command_id,
 59 |         "target_agent_id": target_agent_id,
 60 |         "instructions": instructions,
 61 |         "priority": priority,
 62 |         "dependencies": dependencies or [],
 63 |         "metadata": metadata or {},
 64 |     }
 65 |     _validate_fields(
 66 |         payload, ["command_id", "target_agent_id", "instructions"], "COMMAND"
 67 |     )
 68 |     return SharedMemoryMessage(SharedMemoryMessageType.COMMAND, payload)
 69 | 
 70 | 
 71 | def create_status_message(
 72 |     command_id: str,
 73 |     agent_id: str,
 74 |     status: str,
 75 |     progress: int,
 76 |     blockers: Optional[str] = None,
 77 |     summary_ids: Optional[List[str]] = None,
 78 | ) -> SharedMemoryMessage:
 79 |     """Build a validated STATUS message payload."""
 80 |     payload = {
 81 |         "command_id": command_id,
 82 |         "agent_id": agent_id,
 83 |         "status": status,
 84 |         "progress": max(0, min(progress, 100)),
 85 |         "blockers": blockers,
 86 |         "summary_ids": summary_ids or [],
 87 |     }
 88 |     _validate_fields(payload, ["command_id", "agent_id", "status"], "STATUS")
 89 |     return SharedMemoryMessage(SharedMemoryMessageType.STATUS, payload)
 90 | 
 91 | 
 92 | def create_report_message(
 93 |     command_id: str,
 94 |     agent_id: str,
 95 |     findings: str,
 96 |     citations: Optional[List[str]] = None,
 97 |     gaps: Optional[List[str]] = None,
 98 |     summary_ids: Optional[List[str]] = None,
 99 | ) -> SharedMemoryMessage:
100 |     """Build a validated REPORT message payload."""
101 |     payload = {
102 |         "command_id": command_id,
103 |         "agent_id": agent_id,
104 |         "findings": findings,
105 |         "citations": citations or [],
106 |         "gaps": gaps or [],
107 |         "summary_ids": summary_ids or [],
108 |     }
109 |     _validate_fields(payload, ["command_id", "agent_id", "findings"], "REPORT")
110 |     return SharedMemoryMessage(SharedMemoryMessageType.REPORT, payload)
111 | 


--------------------------------------------------------------------------------
/tests/unit/test_internet_access.py:
--------------------------------------------------------------------------------
  1 | """Tests for internet access integration."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from typing import Any, Dict, List
  6 | from unittest.mock import MagicMock
  7 | 
  8 | import pytest
  9 | 
 10 | from memorizz.internet_access import (
 11 |     InternetAccessProvider,
 12 |     InternetPageContent,
 13 |     InternetSearchResult,
 14 |     register_provider,
 15 | )
 16 | from memorizz.memagent.core import MemAgent
 17 | from memorizz.memagent.managers.internet_access_manager import InternetAccessManager
 18 | from memorizz.memagent.models import MemAgentModel
 19 | 
 20 | 
 21 | class _DummyProvider(InternetAccessProvider):
 22 |     provider_name = "dummy-provider"
 23 | 
 24 |     def __init__(self, **kwargs):
 25 |         super().__init__(kwargs)
 26 | 
 27 |     def search(
 28 |         self, query: str, max_results: int = 5, **kwargs
 29 |     ) -> List[InternetSearchResult]:
 30 |         return [
 31 |             InternetSearchResult(
 32 |                 url=f"https://example.com/{idx}",
 33 |                 title=f"Result {idx}",
 34 |                 snippet=query,
 35 |                 score=1.0,
 36 |             )
 37 |             for idx in range(max_results)
 38 |         ]
 39 | 
 40 |     def fetch_url(self, url: str, **kwargs) -> InternetPageContent:
 41 |         return InternetPageContent(url=url, title="Example", content="Example body")
 42 | 
 43 | 
 44 | register_provider(_DummyProvider.provider_name, _DummyProvider)
 45 | 
 46 | 
 47 | @pytest.mark.unit
 48 | def test_internet_access_manager_serializes_results():
 49 |     provider = _DummyProvider()
 50 |     manager = InternetAccessManager(provider)
 51 | 
 52 |     results = manager.search("memorizz", max_results=2)
 53 |     assert len(results) == 2
 54 |     assert results[0]["url"].startswith("https://example.com/")
 55 | 
 56 |     page = manager.fetch_url("https://memorizz.ai")
 57 |     assert page["content"] == "Example body"
 58 | 
 59 | 
 60 | @pytest.mark.unit
 61 | def test_memagent_registers_internet_tools():
 62 |     provider = MagicMock()
 63 |     provider.get_provider_name.return_value = "dummy"
 64 |     provider.get_config.return_value = {"api_key": "test"}
 65 |     provider.search.return_value = [{"url": "https://example.com"}]
 66 |     provider.fetch_url.return_value = {
 67 |         "url": "https://example.com",
 68 |         "content": "Body",
 69 |     }
 70 | 
 71 |     agent = MemAgent(instruction="Internet agent", internet_access_provider=provider)
 72 | 
 73 |     assert agent.has_internet_access() is True
 74 |     assert "internet_search" in agent.tool_manager.tools
 75 |     assert agent.search_internet("python")
 76 |     provider.search.assert_called_once()
 77 | 
 78 | 
 79 | @pytest.mark.unit
 80 | def test_memagent_disables_internet_access():
 81 |     provider = MagicMock()
 82 |     provider.get_provider_name.return_value = "dummy"
 83 |     provider.get_config.return_value = {}
 84 |     provider.search.return_value = []
 85 |     provider.fetch_url.return_value = {}
 86 | 
 87 |     agent = MemAgent(instruction="toggle agent", internet_access_provider=provider)
 88 |     assert agent.has_internet_access() is True
 89 | 
 90 |     agent.with_internet_access_provider(None)
 91 |     assert agent.has_internet_access() is False
 92 |     assert "internet_search" not in agent.tool_manager.tools
 93 | 
 94 | 
 95 | @pytest.mark.unit
 96 | def test_memagent_load_rehydrated_provider(monkeypatch):
 97 |     memory_provider = MagicMock()
 98 |     saved = MemAgentModel(
 99 |         instruction="Load",
100 |         internet_access_provider=_DummyProvider.provider_name,
101 |         internet_access_config={"custom": "value"},
102 |     )
103 |     memory_provider.retrieve_memagent.return_value = saved
104 | 
105 |     agent = MemAgent.load(
106 |         agent_id="agent-123",
107 |         memory_provider=memory_provider,
108 |     )
109 | 
110 |     assert agent.has_internet_access() is True
111 |     assert agent.get_internet_access_provider_name() == _DummyProvider.provider_name
112 | 


--------------------------------------------------------------------------------
/src/memorizz/long_term_memory/procedural/persona/README.md:
--------------------------------------------------------------------------------
  1 | # Persona Module
  2 | 
  3 | The Persona module provides a framework for creating and managing AI agent personas with specific roles, goals, and backgrounds. This module is part of the Memorizz library, which handles memory management for AI agents.
  4 | 
  5 | ## Features
  6 | 
  7 | - Create personas with predefined or custom roles
  8 | - Automatically generate embeddings for semantic search
  9 | - Store and retrieve personas from memory providers
 10 | - Find similar personas based on semantic similarity
 11 | - Generate system prompts based on persona attributes
 12 | 
 13 | ## Usage
 14 | 
 15 | ### Creating a Persona
 16 | 
 17 | ```python
 18 | from src.memorizz.long_term_memory.semantic.persona import Persona
 19 | from src.memorizz.memory_provider import MemoryProvider
 20 | 
 21 | # Initialize a memory provider
 22 | memory_provider = MemoryProvider()
 23 | 
 24 | # Create a new persona
 25 | tech_expert = Persona(
 26 |     name="TechExpert",
 27 |     role="Technical Support Specialist",
 28 |     goals="Help users troubleshoot technical issues. Provide clear explanations for complex problems.",
 29 |     background="An experienced technical support engineer with expertise in software development, networking, and system administration."
 30 | )
 31 | 
 32 | # Create a persona with more personality traits
 33 | sarcastic_assistant = Persona(
 34 |     name="Monday",
 35 |     role="General",
 36 |     goals="Provide versatile support with a sarcastic tone. Add humor to interactions.",
 37 |     background="A cynical but helpful assistant who uses dry wit and gentle teasing while delivering high-quality information."
 38 | )
 39 | ```
 40 | 
 41 | ### Storing Personas
 42 | 
 43 | Once created, personas can be stored in the memory provider for future use:
 44 | 
 45 | ```python
 46 | # Store the persona in the memory provider
 47 | persona_id = tech_expert.store_persona(memory_provider)
 48 | print(f"Stored persona with ID: {persona_id}")
 49 | ```
 50 | 
 51 | ### Generating Persona Prompts
 52 | 
 53 | Personas can generate system prompts for language models:
 54 | 
 55 | ```python
 56 | # Generate a prompt that can be used with LLMs
 57 | system_prompt = tech_expert.generate_system_prompt_input()
 58 | print(system_prompt)
 59 | ```
 60 | 
 61 | ### Retrieving Personas
 62 | 
 63 | Personas can be retrieved by ID:
 64 | 
 65 | ```python
 66 | # Retrieve a persona using its ID
 67 | retrieved_persona = Persona.retrieve_persona(persona_id, memory_provider)
 68 | print(retrieved_persona)
 69 | ```
 70 | 
 71 | Or by semantic similarity to a query:
 72 | 
 73 | ```python
 74 | # Find personas matching a specific need
 75 | similar_personas = Persona.get_most_similar_persona(
 76 |     "I need a technical expert who can explain complex concepts simply", 
 77 |     memory_provider, 
 78 |     limit=1
 79 | )
 80 | ```
 81 | 
 82 | ### Using Personas with MemAgents
 83 | 
 84 | Personas can be assigned to MemAgents to control their behavior:
 85 | 
 86 | ```python
 87 | from src.memorizz.memagent import MemAgent
 88 | 
 89 | # Create an agent with a specific persona
 90 | agent = MemAgent(
 91 |     model=None,  # Will use default model
 92 |     persona=tech_expert,
 93 |     instruction="Help users with their technical questions",
 94 |     memory_provider=memory_provider
 95 | )
 96 | 
 97 | # Or set/change a persona later
 98 | agent.set_persona(sarcastic_assistant)
 99 | 
100 | # Run the agent with its persona influencing responses
101 | response = agent.run("Can you help me fix my computer?")
102 | ```
103 | 
104 | ### Persona Persistence
105 | 
106 | Personas are stored with vector embeddings for efficient retrieval:
107 | 
108 | ```python
109 | # List all available personas
110 | all_personas = memory_provider.list_all(memory_type=MemoryType.PERSONA)
111 | 
112 | # Delete a persona
113 | memory_provider.delete_by_id(persona_id, memory_type=MemoryType.PERSONA)
114 | ```
115 | 
116 | ## Implementation Notes
117 | 
118 | - Persona embeddings are generated from their attributes for semantic search
119 | - The system automatically converts personas to appropriate prompts for language models
120 | - Personas can be used across multiple agents for consistent behavior
121 | - Custom persona attributes can be added beyond the basic required fields
122 | 
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/src/memorizz/long_term_memory/semantic/persona/README.md:
--------------------------------------------------------------------------------
  1 | # Persona Module
  2 | 
  3 | The Persona module provides a framework for creating and managing AI agent personas with specific roles, goals, and backgrounds. This module is part of the Memorizz library, which handles memory management for AI agents.
  4 | 
  5 | ## Features
  6 | 
  7 | - Create personas with predefined or custom roles
  8 | - Automatically generate embeddings for semantic search
  9 | - Store and retrieve personas from memory providers
 10 | - Find similar personas based on semantic similarity
 11 | - Generate system prompts based on persona attributes
 12 | 
 13 | ## Usage
 14 | 
 15 | ### Creating a Persona
 16 | 
 17 | ```python
 18 | from src.memorizz.long_term_memory.procedural.persona import Persona
 19 | from src.memorizz.memory_provider import MemoryProvider
 20 | 
 21 | # Initialize a memory provider
 22 | memory_provider = MemoryProvider()
 23 | 
 24 | # Create a new persona
 25 | tech_expert = Persona(
 26 |     name="TechExpert",
 27 |     role="Technical Support Specialist",
 28 |     goals="Help users troubleshoot technical issues. Provide clear explanations for complex problems.",
 29 |     background="An experienced technical support engineer with expertise in software development, networking, and system administration."
 30 | )
 31 | 
 32 | # Create a persona with more personality traits
 33 | sarcastic_assistant = Persona(
 34 |     name="Monday",
 35 |     role="General",
 36 |     goals="Provide versatile support with a sarcastic tone. Add humor to interactions.",
 37 |     background="A cynical but helpful assistant who uses dry wit and gentle teasing while delivering high-quality information."
 38 | )
 39 | ```
 40 | 
 41 | ### Storing Personas
 42 | 
 43 | Once created, personas can be stored in the memory provider for future use:
 44 | 
 45 | ```python
 46 | # Store the persona in the memory provider
 47 | persona_id = tech_expert.store_persona(memory_provider)
 48 | print(f"Stored persona with ID: {persona_id}")
 49 | ```
 50 | 
 51 | ### Generating Persona Prompts
 52 | 
 53 | Personas can generate system prompts for language models:
 54 | 
 55 | ```python
 56 | # Generate a prompt that can be used with LLMs
 57 | system_prompt = tech_expert.generate_system_prompt_input()
 58 | print(system_prompt)
 59 | ```
 60 | 
 61 | ### Retrieving Personas
 62 | 
 63 | Personas can be retrieved by ID:
 64 | 
 65 | ```python
 66 | # Retrieve a persona using its ID
 67 | retrieved_persona = Persona.retrieve_persona(persona_id, memory_provider)
 68 | print(retrieved_persona)
 69 | ```
 70 | 
 71 | Or by semantic similarity to a query:
 72 | 
 73 | ```python
 74 | # Find personas matching a specific need
 75 | similar_personas = Persona.get_most_similar_persona(
 76 |     "I need a technical expert who can explain complex concepts simply", 
 77 |     memory_provider, 
 78 |     limit=1
 79 | )
 80 | ```
 81 | 
 82 | ### Using Personas with MemAgents
 83 | 
 84 | Personas can be assigned to MemAgents to control their behavior:
 85 | 
 86 | ```python
 87 | from src.memorizz.memagent import MemAgent
 88 | 
 89 | # Create an agent with a specific persona
 90 | agent = MemAgent(
 91 |     model=None,  # Will use default model
 92 |     persona=tech_expert,
 93 |     instruction="Help users with their technical questions",
 94 |     memory_provider=memory_provider
 95 | )
 96 | 
 97 | # Or set/change a persona later
 98 | agent.set_persona(sarcastic_assistant)
 99 | 
100 | # Run the agent with its persona influencing responses
101 | response = agent.run("Can you help me fix my computer?")
102 | ```
103 | 
104 | ### Persona Persistence
105 | 
106 | Personas are stored with vector embeddings for efficient retrieval:
107 | 
108 | ```python
109 | # List all available personas
110 | all_personas = memory_provider.list_all(memory_type=MemoryType.PERSONA)
111 | 
112 | # Delete a persona
113 | memory_provider.delete_by_id(persona_id, memory_type=MemoryType.PERSONA)
114 | ```
115 | 
116 | ## Implementation Notes
117 | 
118 | - Persona embeddings are generated from their attributes for semantic search
119 | - The system automatically converts personas to appropriate prompts for language models
120 | - Personas can be used across multiple agents for consistent behavior
121 | - Custom persona attributes can be added beyond the basic required fields
122 | 
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/.claude.md:
--------------------------------------------------------------------------------
  1 | # Claude Instructions for Memorizz Project
  2 | 
  3 | This file contains specific instructions for Claude Code to help with common development tasks for the Memorizz project.
  4 | 
  5 | ## Project Overview
  6 | 
  7 | Memorizz is a Python library for AI agent memory management with MongoDB integration and semantic caching capabilities. The project uses semantic versioning and is published to PyPI.
  8 | 
  9 | ## Development Commands
 10 | 
 11 | ### Linting and Type Checking
 12 | When making code changes, always run these commands before committing:
 13 | ```bash
 14 | # Add the appropriate linting commands here once identified
 15 | # Example: flake8, black, mypy, etc.
 16 | ```
 17 | 
 18 | ### Testing
 19 | ```bash
 20 | # Add test commands here once test framework is identified
 21 | # Example: pytest, python -m unittest, etc.
 22 | ```
 23 | 
 24 | ## PyPI Deployment Process
 25 | 
 26 | Use this process when deploying a new version to PyPI:
 27 | 
 28 | ### 1. Version Update
 29 | Update the version number in `pyproject.toml`:
 30 | ```toml
 31 | [project]
 32 | name = "memorizz"
 33 | version = "X.X.X"  # Update this version number
 34 | ```
 35 | 
 36 | ### 2. Clean and Build
 37 | ```bash
 38 | # Clean previous builds
 39 | rm -rf dist/
 40 | 
 41 | # Install build dependencies
 42 | pip install build twine
 43 | 
 44 | # Build source distribution and wheel
 45 | python -m build
 46 | ```
 47 | 
 48 | ### 3. Git Operations
 49 | ```bash
 50 | # Commit version changes
 51 | git add pyproject.toml
 52 | git commit -m "Bump version to X.X.X for PyPI release"
 53 | 
 54 | # Create and push annotated tag
 55 | git tag -a vX.X.X -m "Release version X.X.X"
 56 | git push origin vX.X.X
 57 | git push origin main
 58 | ```
 59 | 
 60 | ### 4. PyPI Upload
 61 | ```bash
 62 | # Upload to production PyPI (requires API token)
 63 | twine upload dist/*
 64 | ```
 65 | 
 66 | **Note**: You'll need to provide your PyPI API token when prompted, or set up `~/.pypirc` with your credentials:
 67 | ```ini
 68 | [pypi]
 69 | username = __token__
 70 | password = your-api-token-here
 71 | ```
 72 | 
 73 | ## Project Structure Notes
 74 | 
 75 | - **Main package**: Located in `src/memorizz/`
 76 | - **Examples**: Located in `examples/` directory with Jupyter notebooks
 77 | - **Memory types**: Defined in `src/memorizz/enums/memory_type.py`
 78 | - **MongoDB provider**: Located in `src/memorizz/memory_provider/mongodb/`
 79 | - **Semantic cache**: Located in `src/memorizz/short_term_memory/semantic_cache.py`
 80 | 
 81 | ## Key Features to Remember
 82 | 
 83 | - **Semantic Cache**: Vector-based query-response caching with configurable similarity thresholds
 84 | - **Memory Types**: CONVERSATION_MEMORY, WORKFLOW_MEMORY, LONG_TERM_MEMORY, SHORT_TERM_MEMORY, PERSONAS, TOOLBOX, SHARED_MEMORY, MEMAGENT, SUMMARIES, SEMANTIC_CACHE
 85 | - **Scoping**: Supports LOCAL (agent-specific) and GLOBAL (cross-agent) cache scopes
 86 | - **MongoDB Integration**: Uses MongoDB Atlas with vector search capabilities
 87 | 
 88 | ## Semantic Cache Configuration Example
 89 | 
 90 | ```python
 91 | from memorizz.short_term_memory.semantic_cache import SemanticCacheConfig
 92 | from memorizz.enums import SemanticCacheScope
 93 | 
 94 | config = SemanticCacheConfig(
 95 |     similarity_threshold=0.85,  # 0.0-1.0 scale
 96 |     max_cache_size=1000,       # Maximum entries
 97 |     ttl_hours=24.0,            # Time-to-live
 98 |     scope=SemanticCacheScope.LOCAL,  # LOCAL or GLOBAL
 99 |     enable_memory_provider_sync=True
100 | )
101 | ```
102 | 
103 | ## Important Reminders
104 | 
105 | - Always test changes before deploying
106 | - Semantic versioning: MAJOR.MINOR.PATCH
107 | - Check that all dependencies are properly listed in `pyproject.toml`
108 | - Ensure MongoDB configurations are properly handled
109 | - Semantic cache requires embedding providers (OpenAI, VoyageAI, etc.)
110 | 
111 | ## Common Issues
112 | 
113 | - **Embedding Provider**: Make sure embedding providers are configured correctly
114 | - **MongoDB Atlas**: Vector search indexes must be created for semantic functionality
115 | - **Dependencies**: Ensure all required packages are installed and compatible
116 | 
117 | ## Repository Information
118 | 
119 | - **GitHub**: https://github.com/RichmondAlake/memorizz
120 | - **PyPI**: https://pypi.org/project/memorizz/
121 | - **Main Branch**: `main`
122 | - **License**: MIT


--------------------------------------------------------------------------------
/eval/longmemeval/README.md:
--------------------------------------------------------------------------------
  1 | # LongMemEval Evaluation for Memorizz
  2 | 
  3 | This directory contains the evaluation script for testing Memorizz's long-term memory capabilities using the LongMemEval benchmark.
  4 | 
  5 | ## Setup
  6 | 
  7 | ### 1. Download the Dataset
  8 | 
  9 | The LongMemEval dataset needs to be downloaded manually from the official repository:
 10 | 
 11 | ```bash
 12 | # Run the download helper script
 13 | python download_dataset.py
 14 | ```
 15 | 
 16 | This will provide instructions for downloading the dataset files. You need to:
 17 | 
 18 | 1. Visit https://github.com/xiaowu0162/LongMemEval
 19 | 2. Follow their setup instructions
 20 | 3. Download the dataset files:
 21 |    - `longmemeval_oracle.json`
 22 |    - `longmemeval_s.json`
 23 |    - `longmemeval_m.json`
 24 | 4. Place these files in the `data/` directory
 25 | 
 26 | ### 2. Install Dependencies
 27 | 
 28 | Make sure you have the required packages installed:
 29 | 
 30 | ```bash
 31 | pip install datasets transformers
 32 | ```
 33 | 
 34 | ### 3. Configure Environment Variables
 35 | 
 36 | The script requires OpenAI API access for evaluation. Set your API key:
 37 | 
 38 | ```bash
 39 | export OPENAI_API_KEY="your-openai-api-key"
 40 | ```
 41 | 
 42 | Optionally, configure MongoDB for memory storage:
 43 | 
 44 | ```bash
 45 | export MONGODB_URI="your-mongodb-connection-string"
 46 | ```
 47 | 
 48 | ## Usage
 49 | 
 50 | ### Basic Evaluation
 51 | 
 52 | Run the evaluation with default settings (oracle variant, 50 samples):
 53 | 
 54 | ```bash
 55 | python evaluate_memorizz.py
 56 | ```
 57 | 
 58 | ### Custom Configuration
 59 | 
 60 | ```bash
 61 | python evaluate_memorizz.py \
 62 |     --dataset_variant oracle \
 63 |     --num_samples 100 \
 64 |     --application_mode general \
 65 |     --output_dir ./results \
 66 |     --verbose
 67 | ```
 68 | 
 69 | ### Parameters
 70 | 
 71 | - `--dataset_variant`: Choose from "oracle", "s", or "m" (default: "oracle")
 72 | - `--num_samples`: Number of samples to evaluate (default: 50)
 73 | - `--application_mode`: Memorizz application mode to use (default: "general")
 74 | - `--output_dir`: Directory to save results (default: "./results")
 75 | - `--verbose`: Enable verbose logging
 76 | 
 77 | ### Dataset Variants
 78 | 
 79 | - **oracle**: Contains only the evidence sessions (easier, for testing)
 80 | - **s**: Short version with ~40 history sessions (~115k tokens)
 81 | - **m**: Medium version with ~500 history sessions (much longer)
 82 | 
 83 | ## Output
 84 | 
 85 | The evaluation script will:
 86 | 
 87 | 1. Load the specified dataset variant
 88 | 2. Create fresh Memorizz agents for each sample
 89 | 3. Process conversation histories to build memory
 90 | 4. Ask evaluation questions and collect responses
 91 | 5. Use GPT-4 to evaluate response quality
 92 | 6. Save detailed results to JSON files
 93 | 
 94 | Results include:
 95 | - Overall accuracy and scores
 96 | - Performance by category (IE, MR, KU, TR, ABS)
 97 | - Detailed per-sample results
 98 | - Processing time statistics
 99 | 
100 | ## Example Output
101 | 
102 | ```
103 | EVALUATION SUMMARY
104 | ==================================================
105 | Dataset Variant: oracle
106 | Application Mode: general
107 | Samples Evaluated: 50
108 | Overall Accuracy: 0.720
109 | Overall Score: 0.756
110 | Processing Time: 245.67s
111 | 
112 | Category Performance:
113 |   information_extraction: 0.850 (12 samples)
114 |   multi_session_reasoning: 0.667 (15 samples)
115 |   knowledge_updates: 0.700 (10 samples)
116 |   temporal_reasoning: 0.600 (8 samples)
117 |   abstention: 0.800 (5 samples)
118 | 
119 | Detailed results saved to: ./results/longmemeval_oracle_general_20241201_143022.json
120 | ```
121 | 
122 | ## Troubleshooting
123 | 
124 | ### Dataset Not Found
125 | 
126 | If you get a "Dataset file not found" error:
127 | 1. Make sure you've downloaded the dataset files
128 | 2. Check that they're in the correct `data/` directory
129 | 3. Verify the filenames match exactly
130 | 
131 | ### Memory Provider Issues
132 | 
133 | If MongoDB connection fails, the script will fall back to the default memory provider. For best results, configure a proper MongoDB instance.
134 | 
135 | ### API Rate Limits
136 | 
137 | The evaluation uses GPT-4 for scoring, which may hit rate limits with large evaluations. Consider:
138 | - Using smaller `num_samples` values
139 | - Adding delays between API calls
140 | - Using a higher-tier OpenAI account 


--------------------------------------------------------------------------------
/src/memorizz/memagent/builders/config_builder.py:
--------------------------------------------------------------------------------
  1 | """Configuration builder for MemAgent."""
  2 | 
  3 | import logging
  4 | from typing import Any, Dict
  5 | 
  6 | from ..models import MemAgentConfig
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | 
 11 | class ConfigBuilder:
 12 |     """
 13 |     Builder for MemAgent configuration objects.
 14 | 
 15 |     This provides a fluent interface for building complex configurations
 16 |     that can be reused across multiple agent instances.
 17 |     """
 18 | 
 19 |     def __init__(self):
 20 |         """Initialize the config builder."""
 21 |         self._config_dict = {}
 22 | 
 23 |     def instruction(self, text: str) -> "ConfigBuilder":
 24 |         """Set the instruction."""
 25 |         self._config_dict["instruction"] = text
 26 |         return self
 27 | 
 28 |     def max_steps(self, steps: int) -> "ConfigBuilder":
 29 |         """Set maximum steps."""
 30 |         self._config_dict["max_steps"] = steps
 31 |         return self
 32 | 
 33 |     def tool_access(self, access: str) -> "ConfigBuilder":
 34 |         """Set tool access level."""
 35 |         self._config_dict["tool_access"] = access
 36 |         return self
 37 | 
 38 |     def semantic_cache(self, enabled: bool) -> "ConfigBuilder":
 39 |         """Enable/disable semantic cache."""
 40 |         self._config_dict["semantic_cache"] = enabled
 41 |         return self
 42 | 
 43 |     def application_mode(self, mode: str) -> "ConfigBuilder":
 44 |         """Set application mode."""
 45 |         self._config_dict["application_mode"] = mode
 46 |         return self
 47 | 
 48 |     def verbose(self, enabled: bool) -> "ConfigBuilder":
 49 |         """Enable/disable verbose logging."""
 50 |         self._config_dict["verbose"] = enabled
 51 |         return self
 52 | 
 53 |     def custom(self, key: str, value: Any) -> "ConfigBuilder":
 54 |         """Add custom configuration parameter."""
 55 |         self._config_dict[key] = value
 56 |         return self
 57 | 
 58 |     def build(self) -> MemAgentConfig:
 59 |         """
 60 |         Build the configuration object.
 61 | 
 62 |         Returns:
 63 |             Configured MemAgentConfig instance.
 64 |         """
 65 |         return MemAgentConfig(**self._config_dict)
 66 | 
 67 |     def to_dict(self) -> Dict[str, Any]:
 68 |         """
 69 |         Export configuration as dictionary.
 70 | 
 71 |         Returns:
 72 |             Dictionary representation of the configuration.
 73 |         """
 74 |         return self._config_dict.copy()
 75 | 
 76 | 
 77 | # Preset configurations
 78 | class ConfigPresets:
 79 |     """Predefined configuration presets for common use cases."""
 80 | 
 81 |     @staticmethod
 82 |     def assistant() -> MemAgentConfig:
 83 |         """Configuration for general assistant."""
 84 |         return (
 85 |             ConfigBuilder()
 86 |             .instruction("You are a helpful AI assistant.")
 87 |             .max_steps(20)
 88 |             .application_mode("assistant")
 89 |             .semantic_cache(False)
 90 |             .build()
 91 |         )
 92 | 
 93 |     @staticmethod
 94 |     def chatbot() -> MemAgentConfig:
 95 |         """Configuration for conversational chatbot."""
 96 |         return (
 97 |             ConfigBuilder()
 98 |             .instruction("You are a friendly conversational chatbot.")
 99 |             .max_steps(15)
100 |             .application_mode("chatbot")
101 |             .semantic_cache(True)
102 |             .build()
103 |         )
104 | 
105 |     @staticmethod
106 |     def task_agent() -> MemAgentConfig:
107 |         """Configuration for task-oriented agent."""
108 |         return (
109 |             ConfigBuilder()
110 |             .instruction(
111 |                 "You are a task-oriented agent focused on completing specific objectives."
112 |             )
113 |             .max_steps(30)
114 |             .application_mode("agent")
115 |             .tool_access("private")
116 |             .semantic_cache(False)
117 |             .build()
118 |         )
119 | 
120 |     @staticmethod
121 |     def research_agent() -> MemAgentConfig:
122 |         """Configuration for research and analysis agent."""
123 |         return (
124 |             ConfigBuilder()
125 |             .instruction(
126 |                 "You are a research agent specialized in information gathering and analysis."
127 |             )
128 |             .max_steps(25)
129 |             .application_mode("agent")
130 |             .semantic_cache(True)
131 |             .verbose(True)
132 |             .build()
133 |         )
134 | 


--------------------------------------------------------------------------------
/=0.26.0:
--------------------------------------------------------------------------------
 1 | Requirement already satisfied: sentence-transformers in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (5.1.2)
 2 | Requirement already satisfied: transformers in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (4.57.1)
 3 | Collecting accelerate
 4 |   Using cached accelerate-1.10.1-py3-none-any.whl (374 kB)
 5 | Requirement already satisfied: huggingface-hub>=0.20.0 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from sentence-transformers) (0.35.0)
 6 | Requirement already satisfied: typing_extensions>=4.5.0 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from sentence-transformers) (4.15.0)
 7 | Requirement already satisfied: scikit-learn in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from sentence-transformers) (1.0.2)
 8 | Requirement already satisfied: Pillow in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from sentence-transformers) (9.0.1)
 9 | Requirement already satisfied: tqdm in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from sentence-transformers) (4.67.1)
10 | Requirement already satisfied: torch>=1.11.0 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from sentence-transformers) (2.2.2)
11 | Requirement already satisfied: scipy in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from sentence-transformers) (1.7.3)
12 | Requirement already satisfied: filelock in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from transformers) (3.6.0)
13 | Requirement already satisfied: packaging>=20.0 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from transformers) (25.0)
14 | Requirement already satisfied: regex!=2019.12.17 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from transformers) (2022.3.15)
15 | Requirement already satisfied: numpy>=1.17 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from transformers) (1.21.5)
16 | Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from transformers) (0.22.1)
17 | Requirement already satisfied: pyyaml>=5.1 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from transformers) (6.0.2)
18 | Requirement already satisfied: requests in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from transformers) (2.32.5)
19 | Requirement already satisfied: safetensors>=0.4.3 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from transformers) (0.6.2)
20 | Requirement already satisfied: psutil in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from accelerate) (5.9.4)
21 | Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from huggingface-hub>=0.20.0->sentence-transformers) (1.1.10)
22 | Requirement already satisfied: fsspec>=2023.5.0 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from huggingface-hub>=0.20.0->sentence-transformers) (2025.9.0)
23 | Requirement already satisfied: sympy in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from torch>=1.11.0->sentence-transformers) (1.10.1)
24 | Requirement already satisfied: networkx in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from torch>=1.11.0->sentence-transformers) (3.2.1)
25 | Requirement already satisfied: jinja2 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from torch>=1.11.0->sentence-transformers) (3.1.6)
26 | Requirement already satisfied: MarkupSafe>=2.0 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from jinja2->torch>=1.11.0->sentence-transformers) (3.0.2)
27 | Requirement already satisfied: idna<4,>=2.5 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from requests->transformers) (3.3)
28 | Requirement already satisfied: charset_normalizer<4,>=2 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from requests->transformers) (2.0.4)
29 | Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from requests->transformers) (2.5.0)
30 | Requirement already satisfied: certifi>=2017.4.17 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from requests->transformers) (2025.8.3)
31 | Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from scikit-learn->sentence-transformers) (2.2.0)
32 | Requirement already satisfied: joblib>=0.11 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from scikit-learn->sentence-transformers) (1.1.0)
33 | Requirement already satisfied: mpmath>=0.19 in /Users/richmondalake/opt/anaconda3/lib/python3.9/site-packages (from sympy->torch>=1.11.0->sentence-transformers) (1.2.1)
34 | Installing collected packages: accelerate
35 | Successfully installed accelerate-1.10.1
36 | 


--------------------------------------------------------------------------------
/tests/unit/test_filesystem_provider.py:
--------------------------------------------------------------------------------
  1 | """Tests for the filesystem memory provider."""
  2 | 
  3 | from pathlib import Path
  4 | from typing import List
  5 | 
  6 | import pytest
  7 | 
  8 | from memorizz.enums import MemoryType
  9 | from memorizz.memagent import MemAgentModel
 10 | from memorizz.memory_provider import FileSystemConfig, FileSystemProvider
 11 | 
 12 | 
 13 | class DummyEmbeddingProvider:
 14 |     """Minimal embedding provider used to avoid network calls."""
 15 | 
 16 |     def __init__(self) -> None:
 17 |         self.calls: List[str] = []
 18 | 
 19 |     def get_embedding(self, text: str) -> List[float]:
 20 |         self.calls.append(text)
 21 |         seed = float(sum(ord(ch) for ch in text))
 22 |         return [seed, float(len(text) or 1), 0.0]
 23 | 
 24 |     def get_provider_info(self) -> str:
 25 |         return "dummy"
 26 | 
 27 | 
 28 | def _make_provider(tmp_path, embedding_provider=None) -> FileSystemProvider:
 29 |     root = Path(tmp_path) / "fs-memory"
 30 |     config = FileSystemConfig(
 31 |         root_path=root, embedding_provider=embedding_provider, lazy_vector_indexes=True
 32 |     )
 33 |     return FileSystemProvider(config)
 34 | 
 35 | 
 36 | def test_store_and_query_documents(tmp_path):
 37 |     provider = _make_provider(tmp_path)
 38 | 
 39 |     doc_id = provider.store(
 40 |         {
 41 |             "name": "demo",
 42 |             "content": "hello filesystem memory",
 43 |             "memory_id": "memory-123",
 44 |         },
 45 |         memory_store_type=MemoryType.LONG_TERM_MEMORY,
 46 |     )
 47 | 
 48 |     retrieved = provider.retrieve_by_id(doc_id, MemoryType.LONG_TERM_MEMORY)
 49 |     assert retrieved["content"] == "hello filesystem memory"
 50 | 
 51 |     results = provider.retrieve_by_query(
 52 |         {"memory_id": "memory-123"},
 53 |         memory_type=MemoryType.LONG_TERM_MEMORY,
 54 |         limit=1,
 55 |     )
 56 |     assert results and results[0]["id"] == doc_id
 57 | 
 58 |     provider.delete_by_id(doc_id, MemoryType.LONG_TERM_MEMORY)
 59 |     assert provider.list_all(MemoryType.LONG_TERM_MEMORY) == []
 60 | 
 61 | 
 62 | def test_memagent_round_trip(tmp_path):
 63 |     provider = _make_provider(tmp_path)
 64 | 
 65 |     agent = MemAgentModel(
 66 |         instruction="test agent",
 67 |         memory_ids=["mem-1"],
 68 |         application_mode="assistant",
 69 |     )
 70 |     agent_id = provider.store_memagent(agent)
 71 | 
 72 |     loaded = provider.retrieve_memagent(agent_id)
 73 |     assert loaded is not None
 74 |     assert loaded.memory_ids == ["mem-1"]
 75 | 
 76 |     provider.delete_memagent(agent_id)
 77 |     assert provider.retrieve_memagent(agent_id) is None
 78 | 
 79 | 
 80 | def test_semantic_query_uses_embedding_provider(tmp_path):
 81 |     dummy = DummyEmbeddingProvider()
 82 |     provider = _make_provider(tmp_path, embedding_provider=dummy)
 83 | 
 84 |     provider.store(
 85 |         {
 86 |             "content": "alpha memory block",
 87 |             "memory_id": "alpha",
 88 |             "embedding": dummy.get_embedding("alpha memory block"),
 89 |         },
 90 |         memory_store_type=MemoryType.LONG_TERM_MEMORY,
 91 |     )
 92 |     provider.store(
 93 |         {
 94 |             "content": "beta unrelated record",
 95 |             "memory_id": "beta",
 96 |             "embedding": dummy.get_embedding("beta unrelated record"),
 97 |         },
 98 |         memory_store_type=MemoryType.LONG_TERM_MEMORY,
 99 |     )
100 | 
101 |     results = provider.retrieve_by_query(
102 |         "alpha memory block",
103 |         memory_type=MemoryType.LONG_TERM_MEMORY,
104 |         limit=1,
105 |         memory_id="alpha",
106 |     )
107 |     assert results and results[0]["memory_id"] == "alpha"
108 |     assert "alpha memory block" in dummy.calls
109 | 
110 | 
111 | def test_keyword_search_without_embeddings(tmp_path):
112 |     provider = _make_provider(tmp_path)
113 |     provider.store(
114 |         {"content": "remember keyword fallback", "memory_id": "k1"},
115 |         memory_store_type=MemoryType.LONG_TERM_MEMORY,
116 |     )
117 | 
118 |     # Force keyword path by disabling embedding lookups
119 |     provider._embedding_provider = None
120 |     provider._get_embedding_provider = lambda: None
121 | 
122 |     results = provider.retrieve_by_query(
123 |         "keyword fallback", memory_type=MemoryType.LONG_TERM_MEMORY, limit=1
124 |     )
125 |     assert results and results[0]["memory_id"] == "k1"
126 | 
127 | 
128 | def test_delete_memagent_cascade_removes_memories(tmp_path):
129 |     provider = _make_provider(tmp_path)
130 | 
131 |     memory_id = "shared-memory"
132 |     provider.store(
133 |         {"content": "greeting", "memory_id": memory_id},
134 |         memory_store_type=MemoryType.CONVERSATION_MEMORY,
135 |     )
136 | 
137 |     agent = MemAgentModel(
138 |         instruction="cascade",
139 |         memory_ids=[memory_id],
140 |         application_mode="assistant",
141 |     )
142 |     agent_id = provider.store_memagent(agent)
143 | 
144 |     provider.delete_memagent(agent_id, cascade=True)
145 |     assert provider.list_all(MemoryType.CONVERSATION_MEMORY) == []
146 | 


--------------------------------------------------------------------------------
/eval/longmemeval/README_evaluation_architectures.md:
--------------------------------------------------------------------------------
  1 | # LongMemEval Multi-Architecture Evaluation
  2 | 
  3 | This directory contains three evaluation scripts for testing Memorizz's long-term memory capabilities using the LongMemEval benchmark across different agentic architectures.
  4 | 
  5 | ## Available Evaluation Scripts
  6 | 
  7 | ### 1. Single Agent Evaluation (`evaluate_memorizz.py`)
  8 | **Architecture**: Single Agent  
  9 | **Description**: Evaluates a single MemAgent's memory capabilities using traditional single-agent architecture.
 10 | 
 11 | **Key Features**:
 12 | - Single agent handles all memory tasks
 13 | - Direct conversation processing
 14 | - Baseline performance measurement
 15 | - Simple architecture for comparison
 16 | 
 17 | **Usage**:
 18 | ```bash
 19 | python evaluate_memorizz.py --variant oracle --samples 50 --verbose
 20 | ```
 21 | 
 22 | ### 2. Delegate Pattern Evaluation (`evaluate_delegate_pattern.py`)
 23 | **Architecture**: Multi-Agent Delegate Pattern  
 24 | **Description**: Evaluates multi-agent architecture where a root agent delegates tasks to specialized agents working in parallel.
 25 | 
 26 | **Key Features**:
 27 | - **Root Agent**: Coordinates and delegates tasks
 28 | - **Memory Specialist**: Focuses on memory retrieval and organization
 29 | - **Temporal Specialist**: Handles time-based queries and sequencing
 30 | - **Context Integrator**: Manages cross-session analysis and patterns
 31 | - Parallel task execution
 32 | - Flat delegation structure
 33 | 
 34 | **Agent Structure**:
 35 | ```
 36 | Root Agent (Coordinator)
 37 | ├── Memory Specialist
 38 | ├── Temporal Specialist
 39 | └── Context Integrator
 40 | ```
 41 | 
 42 | **Usage**:
 43 | ```bash
 44 | python evaluate_delegate_pattern.py --variant oracle --samples 50 --verbose
 45 | ```
 46 | 
 47 | ### 3. Hierarchical Pattern Evaluation (`evaluate_hierarchical_pattern.py`)
 48 | **Architecture**: Multi-Agent Hierarchical Pattern  
 49 | **Description**: Evaluates hierarchical multi-agent architecture with multiple organizational levels and specialized branches.
 50 | 
 51 | **Key Features**:
 52 | - **Executive Agent**: Top-level strategic coordination
 53 | - **Branch Coordinators**: Middle management for specific domains
 54 | - **Specialist Agents**: Bottom-level task execution
 55 | - Hierarchical task distribution
 56 | - Structured command chain
 57 | 
 58 | **Agent Hierarchy**:
 59 | ```
 60 | Executive Coordinator (Top Level)
 61 | ├── Memory Branch
 62 | │   ├── Memory Coordinator (Middle Level)
 63 | │   └── Memory Retrieval Specialist (Bottom Level)
 64 | └── Analysis Branch
 65 |     ├── Analysis Coordinator (Middle Level)
 66 |     ├── Temporal Analysis Specialist (Bottom Level)
 67 |     └── Context Extraction Specialist (Bottom Level)
 68 | ```
 69 | 
 70 | **Usage**:
 71 | ```bash
 72 | python evaluate_hierarchical_pattern.py --variant oracle --samples 50 --verbose
 73 | ```
 74 | 
 75 | ## Architecture Comparison
 76 | 
 77 | | Feature | Single Agent | Delegate Pattern | Hierarchical Pattern |
 78 | |---------|-------------|------------------|---------------------|
 79 | | **Complexity** | Low | Medium | High |
 80 | | **Specialization** | None | High | Very High |
 81 | | **Coordination** | N/A | Flat | Multi-level |
 82 | | **Scalability** | Limited | Good | Excellent |
 83 | | **Task Distribution** | None | Parallel | Hierarchical |
 84 | | **Command Structure** | Direct | Delegate | Chain of Command |
 85 | 
 86 | ## Evaluation Metrics
 87 | 
 88 | All evaluation scripts measure:
 89 | - Response accuracy against ground truth
 90 | - Response time performance
 91 | - Memory utilization effectiveness
 92 | - Architecture-specific metrics
 93 | 
 94 | ## Expected Use Cases
 95 | 
 96 | ### Single Agent
 97 | - Baseline performance measurement
 98 | - Simple memory tasks
 99 | - Resource-constrained environments
100 | 
101 | ### Delegate Pattern
102 | - Parallel processing requirements
103 | - Specialized task domains
104 | - Medium complexity scenarios
105 | 
106 | ### Hierarchical Pattern
107 | - Complex organizational tasks
108 | - Large-scale coordination
109 | - Enterprise-level scenarios
110 | 
111 | ## Running Comparative Analysis
112 | 
113 | To compare all three architectures:
114 | 
115 | ```bash
116 | # Run all evaluations
117 | python evaluate_memorizz.py --variant oracle --samples 50 --output-dir ./results/single
118 | python evaluate_delegate_pattern.py --variant oracle --samples 50 --output-dir ./results/delegate  
119 | python evaluate_hierarchical_pattern.py --variant oracle --samples 50 --output-dir ./results/hierarchical
120 | 
121 | # Results will be saved with architecture identifiers for comparison
122 | ```
123 | 
124 | ## Dataset Variants
125 | 
126 | All scripts support three LongMemEval variants:
127 | - `oracle`: Full dataset with ground truth
128 | - `s`: Short conversation variant
129 | - `m`: Medium conversation variant
130 | 
131 | ## Output Format
132 | 
133 | Each evaluation produces JSON results with:
134 | - Architecture identification
135 | - Detailed sample results
136 | - Aggregate performance metrics
137 | - Timestamp and configuration info
138 | 
139 | Results are saved in the format: `longmemeval_{architecture}_results_{variant}_{timestamp}.json` 


--------------------------------------------------------------------------------
/eval/longmemeval/download_dataset.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Download script for LongMemEval dataset
  4 | 
  5 | This script downloads the LongMemEval dataset from the official Google Drive source
  6 | and extracts it to the correct location for the evaluation script.
  7 | """
  8 | 
  9 | import os
 10 | import sys
 11 | import json
 12 | from pathlib import Path
 13 | import tarfile
 14 | 
 15 | def install_gdown():
 16 |     """Install gdown if not available."""
 17 |     try:
 18 |         import gdown
 19 |         return gdown
 20 |     except ImportError:
 21 |         print("Installing gdown...")
 22 |         import subprocess
 23 |         subprocess.check_call([sys.executable, "-m", "pip", "install", "gdown"])
 24 |         import gdown
 25 |         return gdown
 26 | 
 27 | def main():
 28 |     """Download LongMemEval dataset."""
 29 |     # Get the data directory
 30 |     script_dir = Path(__file__).parent
 31 |     data_dir = script_dir / "data"
 32 |     data_dir.mkdir(exist_ok=True)
 33 |     
 34 |     print("LongMemEval Dataset Downloader")
 35 |     print("=" * 40)
 36 |     
 37 |     # Install gdown if needed
 38 |     try:
 39 |         gdown = install_gdown()
 40 |     except Exception as e:
 41 |         print(f"❌ Failed to install gdown: {e}")
 42 |         print("Please install manually: pip install gdown")
 43 |         return
 44 |     
 45 |     # Official Google Drive download link
 46 |     file_id = '1zJgtYRFhOh5zDQzzatiddfjYhFSnyQ80'
 47 |     url = f'https://drive.google.com/uc?id={file_id}'
 48 |     file_path = data_dir / 'longmemeval_data.tar.gz'
 49 |     
 50 |     print("📥 DOWNLOADING DATASET:")
 51 |     print(f"Source: Official Google Drive")
 52 |     print(f"URL: {url}")
 53 |     print(f"Destination: {file_path}")
 54 |     print()
 55 |     
 56 |     # Download the compressed dataset
 57 |     if not file_path.exists():
 58 |         try:
 59 |             print("Downloading longmemeval_data.tar.gz...")
 60 |             gdown.download(url, str(file_path), quiet=False)
 61 |             print("✅ Download completed!")
 62 |         except Exception as e:
 63 |             print(f"❌ Download failed: {e}")
 64 |             print("You can try downloading manually from:")
 65 |             print(f"https://drive.google.com/file/d/{file_id}/view")
 66 |             return
 67 |     else:
 68 |         print(f"✅ '{file_path.name}' already exists, skipping download.")
 69 |     
 70 |     print()
 71 |     print("📦 EXTRACTING DATASET:")
 72 |     
 73 |     # Check if files already exist
 74 |     expected_files = [
 75 |         'longmemeval_oracle.json',
 76 |         'longmemeval_s.json', 
 77 |         'longmemeval_m.json'
 78 |     ]
 79 |     
 80 |     files_exist = all((data_dir / filename).exists() for filename in expected_files)
 81 |     
 82 |     if not files_exist:
 83 |         try:
 84 |             print("Extracting tar.gz file...")
 85 |             with tarfile.open(file_path, 'r:gz') as tar:
 86 |                 # Extract to data directory
 87 |                 tar.extractall(path=data_dir)
 88 |             print("✅ Extraction completed!")
 89 |         except Exception as e:
 90 |             print(f"❌ Extraction failed: {e}")
 91 |             return
 92 |     else:
 93 |         print("✅ Dataset files already exist, skipping extraction.")
 94 |     
 95 |     print()
 96 |     print("📋 VERIFYING FILES:")
 97 |     
 98 |     all_found = True
 99 |     total_size = 0
100 |     
101 |     for filename in expected_files:
102 |         filepath = data_dir / filename
103 |         if filepath.exists():
104 |             size_mb = filepath.stat().st_size / (1024 * 1024)
105 |             total_size += size_mb
106 |             print(f"✅ {filename} - Found ({size_mb:.1f} MB)")
107 |         else:
108 |             print(f"❌ {filename} - Not found")
109 |             all_found = False
110 |     
111 |     print()
112 |     if all_found:
113 |         print(f"🎉 SUCCESS! All dataset files downloaded and extracted ({total_size:.1f} MB total)")
114 |         print()
115 |         print("📊 DATASET VARIANTS:")
116 |         print("• longmemeval_oracle.json - Oracle retrieval (easiest, for testing)")
117 |         print("• longmemeval_s.json - Short version (~115k tokens, ~40 sessions)")  
118 |         print("• longmemeval_m.json - Medium version (~500 sessions)")
119 |         print()
120 |         print("🚀 READY TO RUN EVALUATION:")
121 |         print("cd eval/longmemeval")
122 |         print("python evaluate_memorizz.py --dataset_variant oracle")
123 |         print("python evaluate_memorizz.py --dataset_variant s")
124 |         print("python evaluate_memorizz.py --dataset_variant m")
125 |     else:
126 |         print("⚠️  Some dataset files are missing after extraction.")
127 |         print("Please check the extracted files or try downloading again.")
128 |     
129 |     # Clean up compressed file (optional)
130 |     if file_path.exists() and all_found:
131 |         try:
132 |             file_path.unlink()
133 |             print(f"🗑️  Cleaned up compressed file: {file_path.name}")
134 |         except:
135 |             pass  # Don't fail if cleanup doesn't work
136 |     
137 |     print(f"\n📂 Data directory: {data_dir}")
138 |     print("📄 Dataset paper: https://arxiv.org/abs/2410.10813")
139 | 
140 | if __name__ == "__main__":
141 |     main() 


--------------------------------------------------------------------------------
/src/memorizz/embeddings/openai/provider.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Any, Dict, List
  3 | 
  4 | import openai
  5 | 
  6 | from .. import BaseEmbeddingProvider
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | # Suppress httpx logs to reduce noise from API requests
 11 | logging.getLogger("httpx").setLevel(logging.WARNING)
 12 | 
 13 | 
 14 | class OpenAIEmbeddingProvider(BaseEmbeddingProvider):
 15 |     """OpenAI embedding provider implementation."""
 16 | 
 17 |     # Model configuration with their dimensions
 18 |     MODEL_DIMENSIONS = {
 19 |         "text-embedding-3-small": 1536,  # Default for 3-small is 1536, but can be reduced
 20 |         "text-embedding-3-large": 3072,  # Default for 3-large is 3072, but can be reduced
 21 |         "text-embedding-ada-002": 1536,  # Fixed dimensions
 22 |     }
 23 | 
 24 |     def __init__(self, config: Dict[str, Any] = None):
 25 |         """
 26 |         Initialize OpenAI embedding provider.
 27 | 
 28 |         Parameters:
 29 |         -----------
 30 |         config : Dict[str, Any]
 31 |             Configuration dictionary with keys:
 32 |             - model: str (default: "text-embedding-3-small")
 33 |             - dimensions: int (default: 256, only for text-embedding-3-* models)
 34 |             - api_key: str (optional, uses env var if not provided)
 35 |             - base_url: str (optional, for custom endpoints)
 36 |         """
 37 |         super().__init__(config)
 38 | 
 39 |         # Set default configuration
 40 |         self.model = self.config.get("model", "text-embedding-3-small")
 41 |         self.dimensions = self.config.get("dimensions", 256)
 42 | 
 43 |         # Validate model and dimensions
 44 |         if self.model not in self.MODEL_DIMENSIONS:
 45 |             raise ValueError(
 46 |                 f"Unsupported OpenAI model: {self.model}. Supported models: {list(self.MODEL_DIMENSIONS.keys())}"
 47 |             )
 48 | 
 49 |         # For ada-002, dimensions cannot be customized
 50 |         if self.model == "text-embedding-ada-002" and self.dimensions != 1536:
 51 |             logger.warning(
 52 |                 f"Model {self.model} has fixed dimensions of 1536. Ignoring custom dimensions parameter."
 53 |             )
 54 |             self.dimensions = 1536
 55 | 
 56 |         # For 3-small and 3-large, validate dimensions are within allowed range
 57 |         if self.model in ["text-embedding-3-small", "text-embedding-3-large"]:
 58 |             max_dims = self.MODEL_DIMENSIONS[self.model]
 59 |             if self.dimensions > max_dims:
 60 |                 raise ValueError(
 61 |                     f"Dimensions {self.dimensions} exceed maximum {max_dims} for model {self.model}"
 62 |                 )
 63 | 
 64 |         # Initialize OpenAI client
 65 |         client_kwargs = {}
 66 |         if "api_key" in self.config:
 67 |             client_kwargs["api_key"] = self.config["api_key"]
 68 |         if "base_url" in self.config:
 69 |             client_kwargs["base_url"] = self.config["base_url"]
 70 | 
 71 |         self.client = openai.OpenAI(**client_kwargs)
 72 | 
 73 |         logger.info(
 74 |             f"Initialized OpenAI provider with model={self.model}, dimensions={self.dimensions}"
 75 |         )
 76 | 
 77 |     def get_embedding(self, text: str, **kwargs) -> List[float]:
 78 |         """
 79 |         Generate embedding using OpenAI's API.
 80 | 
 81 |         Parameters:
 82 |         -----------
 83 |         text : str
 84 |             The text to embed
 85 |         **kwargs
 86 |             Additional parameters:
 87 |             - model: str (override default model)
 88 |             - dimensions: int (override default dimensions)
 89 | 
 90 |         Returns:
 91 |         --------
 92 |         List[float]
 93 |             The embedding vector
 94 |         """
 95 |         # Allow per-call overrides
 96 |         model = kwargs.get("model", self.model)
 97 |         dimensions = kwargs.get("dimensions", self.dimensions)
 98 | 
 99 |         # Clean the text
100 |         text = text.replace("\n", " ")
101 | 
102 |         try:
103 |             # For ada-002, don't pass dimensions parameter
104 |             if model == "text-embedding-ada-002":
105 |                 response = self.client.embeddings.create(input=[text], model=model)
106 |             else:
107 |                 response = self.client.embeddings.create(
108 |                     input=[text], model=model, dimensions=dimensions
109 |                 )
110 | 
111 |             return response.data[0].embedding
112 |         except Exception as e:
113 |             logger.error(f"Error generating OpenAI embedding: {str(e)}")
114 |             raise
115 | 
116 |     def get_dimensions(self) -> int:
117 |         """Get the dimensionality of embeddings produced by this provider."""
118 |         return self.dimensions
119 | 
120 |     def get_default_model(self) -> str:
121 |         """Get the default model name for this provider."""
122 |         return self.model
123 | 
124 |     @classmethod
125 |     def get_available_models(cls) -> List[str]:
126 |         """Get list of available OpenAI embedding models."""
127 |         return list(cls.MODEL_DIMENSIONS.keys())
128 | 
129 |     @classmethod
130 |     def get_model_max_dimensions(cls, model: str) -> int:
131 |         """Get maximum dimensions for a specific model."""
132 |         return cls.MODEL_DIMENSIONS.get(model, 1536)
133 | 


--------------------------------------------------------------------------------
/src/memorizz/enums/application_mode.py:
--------------------------------------------------------------------------------
  1 | from enum import Enum
  2 | from typing import List
  3 | 
  4 | from .memory_type import MemoryType
  5 | 
  6 | 
  7 | class ApplicationMode(Enum):
  8 |     """
  9 |     Application modes define the environment and context the agent operates within,
 10 |     automatically configuring the appropriate memory types for each scenario.
 11 |     """
 12 | 
 13 |     # Available application modes
 14 |     WORKFLOW = "workflow"
 15 |     DEEP_RESEARCH = "deep_research"
 16 |     ASSISTANT = "assistant"
 17 | 
 18 |     # Default mode
 19 |     DEFAULT = ASSISTANT
 20 | 
 21 | 
 22 | class ApplicationModeConfig:
 23 |     """
 24 |     Configuration class that maps application modes to their associated memory types
 25 |     and provides additional configuration for each mode.
 26 |     """
 27 | 
 28 |     # Memory type mappings for each application mode
 29 |     MEMORY_TYPE_MAPPINGS = {
 30 |         ApplicationMode.WORKFLOW: [
 31 |             MemoryType.WORKFLOW_MEMORY,
 32 |             MemoryType.TOOLBOX,
 33 |             MemoryType.LONG_TERM_MEMORY,  # Knowledge base
 34 |             MemoryType.SHORT_TERM_MEMORY,  # For intermediate results
 35 |         ],
 36 |         ApplicationMode.DEEP_RESEARCH: [
 37 |             MemoryType.TOOLBOX,
 38 |             MemoryType.SHARED_MEMORY,
 39 |             MemoryType.LONG_TERM_MEMORY,  # Research knowledge base
 40 |             MemoryType.SHORT_TERM_MEMORY,  # For research sessions
 41 |             MemoryType.SUMMARIES,  # For context compression
 42 |         ],
 43 |         ApplicationMode.ASSISTANT: [
 44 |             MemoryType.CONVERSATION_MEMORY,
 45 |             MemoryType.LONG_TERM_MEMORY,  # Knowledge base
 46 |             MemoryType.PERSONAS,  # For personalization
 47 |             MemoryType.ENTITY_MEMORY,  # Structured entity facts
 48 |             MemoryType.SHORT_TERM_MEMORY,  # For context
 49 |             MemoryType.SUMMARIES,  # For memory compression
 50 |         ],
 51 |     }
 52 | 
 53 |     # Description for each application mode
 54 |     MODE_DESCRIPTIONS = {
 55 |         ApplicationMode.WORKFLOW: "Optimized for structured task execution and process automation",
 56 |         ApplicationMode.DEEP_RESEARCH: "Designed for intensive research with collaboration capabilities",
 57 |         ApplicationMode.ASSISTANT: "General-purpose conversational assistant with personalization",
 58 |     }
 59 | 
 60 |     @classmethod
 61 |     def get_memory_types(cls, mode: ApplicationMode) -> List[MemoryType]:
 62 |         """
 63 |         Get the memory types associated with an application mode.
 64 | 
 65 |         Parameters:
 66 |         -----------
 67 |         mode : ApplicationMode
 68 |             The application mode to get memory types for.
 69 | 
 70 |         Returns:
 71 |         --------
 72 |         List[MemoryType]
 73 |             List of memory types for the specified mode.
 74 |         """
 75 |         return cls.MEMORY_TYPE_MAPPINGS.get(
 76 |             mode, cls.MEMORY_TYPE_MAPPINGS[ApplicationMode.DEFAULT]
 77 |         )
 78 | 
 79 |     @classmethod
 80 |     def get_description(cls, mode: ApplicationMode) -> str:
 81 |         """
 82 |         Get the description for an application mode.
 83 | 
 84 |         Parameters:
 85 |         -----------
 86 |         mode : ApplicationMode
 87 |             The application mode to get description for.
 88 | 
 89 |         Returns:
 90 |         --------
 91 |         str
 92 |             Description of the application mode.
 93 |         """
 94 |         return cls.MODE_DESCRIPTIONS.get(mode, "General-purpose application mode")
 95 | 
 96 |     @classmethod
 97 |     def list_all_modes(cls) -> List[tuple]:
 98 |         """
 99 |         List all available application modes with their descriptions.
100 | 
101 |         Returns:
102 |         --------
103 |         List[tuple]
104 |             List of (mode, description) tuples.
105 |         """
106 |         return [(mode, cls.get_description(mode)) for mode in ApplicationMode]
107 | 
108 |     @classmethod
109 |     def validate_mode(cls, mode_input) -> ApplicationMode:
110 |         """
111 |         Validate and convert a string or enum to ApplicationMode enum.
112 | 
113 |         Parameters:
114 |         -----------
115 |         mode_input : str | ApplicationMode
116 |             String representation or enum of the application mode.
117 | 
118 |         Returns:
119 |         --------
120 |         ApplicationMode
121 |             The corresponding ApplicationMode enum.
122 | 
123 |         Raises:
124 |         -------
125 |         ValueError
126 |             If the mode input is not valid.
127 |         """
128 |         # If it's already an ApplicationMode enum, return it directly
129 |         if isinstance(mode_input, ApplicationMode):
130 |             return mode_input
131 | 
132 |         # If it's a string, convert it
133 |         if isinstance(mode_input, str):
134 |             try:
135 |                 return ApplicationMode(mode_input.lower())
136 |             except ValueError:
137 |                 valid_modes = [mode.value for mode in ApplicationMode]
138 |                 raise ValueError(
139 |                     f"Invalid application mode: '{mode_input}'. Valid modes: {valid_modes}"
140 |                 )
141 | 
142 |         # If it's neither string nor enum, raise an error
143 |         raise ValueError(
144 |             f"Application mode must be a string or ApplicationMode enum, got {type(mode_input)}"
145 |         )
146 | 


--------------------------------------------------------------------------------
/src/memorizz/memory_provider/base.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from typing import TYPE_CHECKING, Any, Dict, List, Optional
  3 | 
  4 | # Use TYPE_CHECKING for forward references to avoid circular imports
  5 | if TYPE_CHECKING:
  6 |     from memorizz.memagent import MemAgent
  7 | 
  8 | 
  9 | class MemoryProvider(ABC):
 10 |     """Abstract base class for memory providers."""
 11 | 
 12 |     @abstractmethod
 13 |     def __init__(self, config: Dict[str, Any]):
 14 |         """Initialize the memory provider with configuration settings."""
 15 | 
 16 |     @abstractmethod
 17 |     def store(
 18 |         self,
 19 |         data: Dict[str, Any] = None,
 20 |         memory_store_type: str = None,
 21 |         memory_id: str = None,
 22 |         memory_unit: Any = None,
 23 |     ) -> str:
 24 |         """
 25 |         Store data in the memory provider.
 26 | 
 27 |         Parameters:
 28 |         -----------
 29 |         data : Dict[str, Any], optional
 30 |             Data dictionary to store (legacy parameter)
 31 |         memory_store_type : str, optional
 32 |             Type of memory store (legacy parameter)
 33 |         memory_id : str, optional
 34 |             Memory ID to associate with (new parameter)
 35 |         memory_unit : MemoryUnit, optional
 36 |             Memory unit object to store (new parameter)
 37 |         """
 38 | 
 39 |     @abstractmethod
 40 |     def retrieve_by_query(
 41 |         self,
 42 |         query: Dict[str, Any],
 43 |         memory_store_type: str = None,
 44 |         limit: int = 1,
 45 |         memory_id: str = None,
 46 |         memory_type: str = None,
 47 |         **kwargs,
 48 |     ) -> Optional[Dict[str, Any]]:
 49 |         """
 50 |         Retrieve a document from the memory provider.
 51 | 
 52 |         Parameters:
 53 |         -----------
 54 |         query : Dict[str, Any] or str
 55 |             Search query (dict for filter queries, str for semantic search)
 56 |         memory_store_type : str, optional
 57 |             Type of memory store (legacy parameter name)
 58 |         memory_type : str or MemoryType, optional
 59 |             Type of memory store (new parameter name, takes precedence over memory_store_type)
 60 |         memory_id : str, optional
 61 |             Filter results to specific memory_id
 62 |         limit : int
 63 |             Maximum number of results to return
 64 |         **kwargs
 65 |             Additional provider-specific parameters
 66 |         """
 67 | 
 68 |     @abstractmethod
 69 |     def retrieve_by_id(
 70 |         self, id: str, memory_store_type: str
 71 |     ) -> Optional[Dict[str, Any]]:
 72 |         """Retrieve a document from the memory provider by id."""
 73 | 
 74 |     @abstractmethod
 75 |     def retrieve_by_name(
 76 |         self, name: str, memory_store_type: str
 77 |     ) -> Optional[Dict[str, Any]]:
 78 |         """Retrieve a document from the memory provider by name."""
 79 | 
 80 |     @abstractmethod
 81 |     def delete_by_id(self, id: str, memory_store_type: str) -> bool:
 82 |         """Delete a document from the memory provider by id."""
 83 | 
 84 |     @abstractmethod
 85 |     def delete_by_name(self, name: str, memory_store_type: str) -> bool:
 86 |         """Delete a document from the memory provider by name."""
 87 | 
 88 |     @abstractmethod
 89 |     def delete_all(self, memory_store_type: str) -> bool:
 90 |         """Delete all documents within a memory store type in the memory provider."""
 91 | 
 92 |     @abstractmethod
 93 |     def list_all(self, memory_store_type: str) -> List[Dict[str, Any]]:
 94 |         """List all documents within a memory store type in the memory provider."""
 95 | 
 96 |     @abstractmethod
 97 |     def retrieve_conversation_history_ordered_by_timestamp(
 98 |         self, memory_id: str, memory_type: str = None, limit: int = None
 99 |     ) -> List[Dict[str, Any]]:
100 |         """
101 |         Retrieve the conversation history ordered by timestamp.
102 | 
103 |         Parameters:
104 |         -----------
105 |         memory_id : str
106 |             The memory ID to retrieve history for
107 |         memory_type : str or MemoryType, optional
108 |             Type of memory (typically CONVERSATION_MEMORY)
109 |         limit : int, optional
110 |             Maximum number of entries to return
111 |         """
112 | 
113 |     @abstractmethod
114 |     def update_by_id(
115 |         self, id: str, data: Dict[str, Any], memory_store_type: str
116 |     ) -> bool:
117 |         """Update a document in a memory store type in the memory provider by id."""
118 | 
119 |     @abstractmethod
120 |     def close(self) -> None:
121 |         """Close the connection to the memory provider."""
122 | 
123 |     @abstractmethod
124 |     def store_memagent(self, memagent: "MemAgent") -> str:
125 |         """Store a memagent in the memory provider."""
126 | 
127 |     @abstractmethod
128 |     def delete_memagent(self, agent_id: str, cascade: bool = False) -> bool:
129 |         """Delete a memagent from the memory provider."""
130 | 
131 |     @abstractmethod
132 |     def update_memagent_memory_ids(self, agent_id: str, memory_ids: List[str]) -> bool:
133 |         """Update the memory_ids of a memagent in the memory provider."""
134 | 
135 |     @abstractmethod
136 |     def delete_memagent_memory_ids(self, agent_id: str) -> bool:
137 |         """Delete the memory_ids of a memagent in the memory provider."""
138 | 
139 |     @abstractmethod
140 |     def list_memagents(self) -> List[Dict[str, Any]]:
141 |         """List all memagents in the memory provider."""
142 | 


--------------------------------------------------------------------------------
/src/memorizz/memagent/managers/workflow_manager.py:
--------------------------------------------------------------------------------
  1 | """Workflow management functionality for MemAgent."""
  2 | 
  3 | import logging
  4 | from typing import Any, Dict, List, Optional
  5 | 
  6 | from ...long_term_memory.procedural.workflow.workflow import Workflow, WorkflowOutcome
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | 
 11 | class WorkflowManager:
 12 |     """
 13 |     Manages workflow execution and orchestration for MemAgent.
 14 | 
 15 |     This class encapsulates workflow-related functionality that was
 16 |     previously embedded in the main MemAgent class.
 17 |     """
 18 | 
 19 |     def __init__(self):
 20 |         """Initialize the workflow manager."""
 21 |         self.active_workflows = {}
 22 |         self.workflow_history = []
 23 |         self._workflow_cache = {}
 24 | 
 25 |     def execute_workflow(
 26 |         self, workflow: Workflow, context: Dict[str, Any]
 27 |     ) -> WorkflowOutcome:
 28 |         """
 29 |         Execute a workflow.
 30 | 
 31 |         Args:
 32 |             workflow: The Workflow instance to execute.
 33 |             context: Context dictionary for the workflow.
 34 | 
 35 |         Returns:
 36 |             WorkflowOutcome containing the result.
 37 |         """
 38 |         try:
 39 |             logger.info(
 40 |                 f"Executing workflow: {workflow.name if hasattr(workflow, 'name') else 'unnamed'}"
 41 |             )
 42 | 
 43 |             # Track active workflow
 44 |             workflow_id = self._generate_workflow_id()
 45 |             self.active_workflows[workflow_id] = {
 46 |                 "workflow": workflow,
 47 |                 "context": context,
 48 |                 "status": "running",
 49 |             }
 50 | 
 51 |             # Execute the workflow
 52 |             outcome = workflow.execute(context)
 53 | 
 54 |             # Update tracking
 55 |             self.active_workflows[workflow_id]["status"] = "completed"
 56 |             self.active_workflows[workflow_id]["outcome"] = outcome
 57 | 
 58 |             # Add to history
 59 |             self._add_to_history(workflow_id, workflow, context, outcome)
 60 | 
 61 |             # Clean up active workflow
 62 |             del self.active_workflows[workflow_id]
 63 | 
 64 |             logger.info(
 65 |                 f"Workflow completed with status: {outcome.status if hasattr(outcome, 'status') else 'unknown'}"
 66 |             )
 67 |             return outcome
 68 | 
 69 |         except Exception as e:
 70 |             logger.error(f"Workflow execution failed: {e}")
 71 |             return WorkflowOutcome(result=f"Error: {str(e)}", status="failed")
 72 | 
 73 |     def get_active_workflows(self) -> Dict[str, Dict[str, Any]]:
 74 |         """
 75 |         Get currently active workflows.
 76 | 
 77 |         Returns:
 78 |             Dictionary of active workflows.
 79 |         """
 80 |         return self.active_workflows.copy()
 81 | 
 82 |     def cancel_workflow(self, workflow_id: str) -> bool:
 83 |         """
 84 |         Cancel an active workflow.
 85 | 
 86 |         Args:
 87 |             workflow_id: ID of the workflow to cancel.
 88 | 
 89 |         Returns:
 90 |             True if cancelled, False otherwise.
 91 |         """
 92 |         try:
 93 |             if workflow_id in self.active_workflows:
 94 |                 self.active_workflows[workflow_id]["status"] = "cancelled"
 95 |                 del self.active_workflows[workflow_id]
 96 |                 logger.info(f"Cancelled workflow: {workflow_id}")
 97 |                 return True
 98 |             else:
 99 |                 logger.warning(f"Workflow not found for cancellation: {workflow_id}")
100 |                 return False
101 | 
102 |         except Exception as e:
103 |             logger.error(f"Failed to cancel workflow: {e}")
104 |             return False
105 | 
106 |     def get_workflow_history(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
107 |         """
108 |         Get workflow execution history.
109 | 
110 |         Args:
111 |             limit: Maximum number of history entries to return.
112 | 
113 |         Returns:
114 |             List of workflow history entries.
115 |         """
116 |         if limit:
117 |             return self.workflow_history[-limit:]
118 |         return self.workflow_history.copy()
119 | 
120 |     def clear_history(self):
121 |         """Clear the workflow execution history."""
122 |         self.workflow_history.clear()
123 |         logger.debug("Cleared workflow history")
124 | 
125 |     def _generate_workflow_id(self) -> str:
126 |         """Generate a unique workflow ID."""
127 |         import uuid
128 | 
129 |         return f"workflow_{uuid.uuid4().hex[:8]}"
130 | 
131 |     def _add_to_history(
132 |         self,
133 |         workflow_id: str,
134 |         workflow: Workflow,
135 |         context: Dict[str, Any],
136 |         outcome: WorkflowOutcome,
137 |     ):
138 |         """Add a workflow execution to history."""
139 |         from datetime import datetime
140 | 
141 |         history_entry = {
142 |             "id": workflow_id,
143 |             "workflow_name": getattr(workflow, "name", "unnamed"),
144 |             "timestamp": datetime.now().isoformat(),
145 |             "context": context,
146 |             "outcome": {
147 |                 "result": outcome.result if hasattr(outcome, "result") else None,
148 |                 "status": outcome.status if hasattr(outcome, "status") else "unknown",
149 |             },
150 |         }
151 | 
152 |         self.workflow_history.append(history_entry)
153 | 
154 |         # Limit history size
155 |         max_history = 100
156 |         if len(self.workflow_history) > max_history:
157 |             self.workflow_history = self.workflow_history[-max_history:]
158 | 


--------------------------------------------------------------------------------
/src/memorizz/memory_unit/summary_component.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Summary Component for MemAgent
  3 | 
  4 | Provides a structured approach to working with memory summaries that compress
  5 | multiple memory components into emotionally and situationally relevant content.
  6 | """
  7 | 
  8 | import time
  9 | from datetime import datetime
 10 | from typing import List, Optional
 11 | 
 12 | from pydantic import BaseModel
 13 | 
 14 | 
 15 | class SummaryComponent(BaseModel):
 16 |     """
 17 |     A structured representation of a memory summary.
 18 | 
 19 |     Summaries compress multiple memory components from a time period into
 20 |     emotionally and situationally relevant content using an LLM.
 21 |     """
 22 | 
 23 |     memory_id: str
 24 |     agent_id: str
 25 |     summary_content: str
 26 |     period_start: float
 27 |     period_end: float
 28 |     memory_units_count: int
 29 |     created_at: float
 30 |     embedding: Optional[List[float]] = None
 31 | 
 32 |     # Optional metadata
 33 |     summary_type: str = "automatic"  # automatic, manual, scheduled
 34 |     compression_ratio: Optional[float] = None  # original_count / summarized_count
 35 |     emotional_tags: Optional[List[str]] = None  # emotional themes identified
 36 |     situational_tags: Optional[List[str]] = None  # situational contexts
 37 |     importance_score: Optional[float] = None  # 0.0 to 1.0 relevance score
 38 | 
 39 |     def __init__(self, **data):
 40 |         """Initialize summary component with current timestamp if not provided."""
 41 |         if "created_at" not in data:
 42 |             data["created_at"] = time.time()
 43 |         super().__init__(**data)
 44 | 
 45 |     @property
 46 |     def period_start_datetime(self) -> datetime:
 47 |         """Get period start as a datetime object."""
 48 |         return datetime.fromtimestamp(self.period_start)
 49 | 
 50 |     @property
 51 |     def period_end_datetime(self) -> datetime:
 52 |         """Get period end as a datetime object."""
 53 |         return datetime.fromtimestamp(self.period_end)
 54 | 
 55 |     @property
 56 |     def created_datetime(self) -> datetime:
 57 |         """Get creation time as a datetime object."""
 58 |         return datetime.fromtimestamp(self.created_at)
 59 | 
 60 |     @property
 61 |     def period_duration_hours(self) -> float:
 62 |         """Get the duration of the summarized period in hours."""
 63 |         return (self.period_end - self.period_start) / 3600
 64 | 
 65 |     def to_dict(self) -> dict:
 66 |         """Convert to dictionary for storage."""
 67 |         return self.model_dump()
 68 | 
 69 |     @classmethod
 70 |     def from_dict(cls, data: dict) -> "SummaryComponent":
 71 |         """Create from dictionary loaded from storage."""
 72 |         return cls(**data)
 73 | 
 74 |     def get_short_preview(self, max_length: int = 100) -> str:
 75 |         """Get a short preview of the summary content."""
 76 |         if len(self.summary_content) <= max_length:
 77 |             return self.summary_content
 78 |         return self.summary_content[:max_length] + "..."
 79 | 
 80 |     def add_emotional_tag(self, tag: str):
 81 |         """Add an emotional tag to the summary."""
 82 |         if self.emotional_tags is None:
 83 |             self.emotional_tags = []
 84 |         if tag not in self.emotional_tags:
 85 |             self.emotional_tags.append(tag)
 86 | 
 87 |     def add_situational_tag(self, tag: str):
 88 |         """Add a situational tag to the summary."""
 89 |         if self.situational_tags is None:
 90 |             self.situational_tags = []
 91 |         if tag not in self.situational_tags:
 92 |             self.situational_tags.append(tag)
 93 | 
 94 |     def calculate_compression_ratio(self, original_memory_count: int):
 95 |         """Calculate and set the compression ratio."""
 96 |         if original_memory_count > 0:
 97 |             self.compression_ratio = (
 98 |                 original_memory_count / 1
 99 |             )  # Summary is 1 compressed item
100 | 
101 |     def __str__(self) -> str:
102 |         """String representation of the summary."""
103 |         return f"Summary({self.memory_id}, {self.period_start_datetime.strftime('%Y-%m-%d')} to {self.period_end_datetime.strftime('%Y-%m-%d')}, {self.memory_units_count} memories)"
104 | 
105 |     def __repr__(self) -> str:
106 |         """Detailed string representation."""
107 |         return f"SummaryComponent(memory_id='{self.memory_id}', agent_id='{self.agent_id}', period='{self.period_start_datetime}' to '{self.period_end_datetime}', memories={self.memory_units_count})"
108 | 
109 | 
110 | class SummaryMetrics(BaseModel):
111 |     """
112 |     Metrics and analytics for summary generation and usage.
113 |     """
114 | 
115 |     total_summaries: int = 0
116 |     total_memories_compressed: int = 0
117 |     average_compression_ratio: float = 0.0
118 |     most_common_emotional_tags: List[str] = []
119 |     most_common_situational_tags: List[str] = []
120 |     persona_updates_triggered: int = 0
121 | 
122 |     def add_summary(self, summary: SummaryComponent):
123 |         """Add a summary to the metrics."""
124 |         self.total_summaries += 1
125 |         self.total_memories_compressed += summary.memory_units_count
126 | 
127 |         if summary.compression_ratio:
128 |             current_total = self.average_compression_ratio * (self.total_summaries - 1)
129 |             self.average_compression_ratio = (
130 |                 current_total + summary.compression_ratio
131 |             ) / self.total_summaries
132 | 
133 |     def get_compression_efficiency(self) -> float:
134 |         """Get overall compression efficiency."""
135 |         if self.total_summaries == 0:
136 |             return 0.0
137 |         return self.total_memories_compressed / self.total_summaries
138 | 


--------------------------------------------------------------------------------
/src/memorizz/long_term_memory/episodic/summary_component.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Summary Component for MemAgent
  3 | 
  4 | Provides a structured approach to working with memory summaries that compress
  5 | multiple memory components into emotionally and situationally relevant content.
  6 | """
  7 | 
  8 | import time
  9 | from datetime import datetime
 10 | from typing import List, Optional
 11 | 
 12 | from pydantic import BaseModel
 13 | 
 14 | 
 15 | class SummaryComponent(BaseModel):
 16 |     """
 17 |     A structured representation of a memory summary.
 18 | 
 19 |     Summaries compress multiple memory components from a time period into
 20 |     emotionally and situationally relevant content using an LLM.
 21 |     """
 22 | 
 23 |     memory_id: str
 24 |     agent_id: str
 25 |     summary_content: str
 26 |     period_start: float
 27 |     period_end: float
 28 |     memory_units_count: int
 29 |     created_at: float
 30 |     embedding: Optional[List[float]] = None
 31 | 
 32 |     # Optional metadata
 33 |     summary_type: str = "automatic"  # automatic, manual, scheduled
 34 |     compression_ratio: Optional[float] = None  # original_count / summarized_count
 35 |     emotional_tags: Optional[List[str]] = None  # emotional themes identified
 36 |     situational_tags: Optional[List[str]] = None  # situational contexts
 37 |     importance_score: Optional[float] = None  # 0.0 to 1.0 relevance score
 38 | 
 39 |     def __init__(self, **data):
 40 |         """Initialize summary component with current timestamp if not provided."""
 41 |         if "created_at" not in data:
 42 |             data["created_at"] = time.time()
 43 |         super().__init__(**data)
 44 | 
 45 |     @property
 46 |     def period_start_datetime(self) -> datetime:
 47 |         """Get period start as a datetime object."""
 48 |         return datetime.fromtimestamp(self.period_start)
 49 | 
 50 |     @property
 51 |     def period_end_datetime(self) -> datetime:
 52 |         """Get period end as a datetime object."""
 53 |         return datetime.fromtimestamp(self.period_end)
 54 | 
 55 |     @property
 56 |     def created_datetime(self) -> datetime:
 57 |         """Get creation time as a datetime object."""
 58 |         return datetime.fromtimestamp(self.created_at)
 59 | 
 60 |     @property
 61 |     def period_duration_hours(self) -> float:
 62 |         """Get the duration of the summarized period in hours."""
 63 |         return (self.period_end - self.period_start) / 3600
 64 | 
 65 |     def to_dict(self) -> dict:
 66 |         """Convert to dictionary for storage."""
 67 |         return self.model_dump()
 68 | 
 69 |     @classmethod
 70 |     def from_dict(cls, data: dict) -> "SummaryComponent":
 71 |         """Create from dictionary loaded from storage."""
 72 |         return cls(**data)
 73 | 
 74 |     def get_short_preview(self, max_length: int = 100) -> str:
 75 |         """Get a short preview of the summary content."""
 76 |         if len(self.summary_content) <= max_length:
 77 |             return self.summary_content
 78 |         return self.summary_content[:max_length] + "..."
 79 | 
 80 |     def add_emotional_tag(self, tag: str):
 81 |         """Add an emotional tag to the summary."""
 82 |         if self.emotional_tags is None:
 83 |             self.emotional_tags = []
 84 |         if tag not in self.emotional_tags:
 85 |             self.emotional_tags.append(tag)
 86 | 
 87 |     def add_situational_tag(self, tag: str):
 88 |         """Add a situational tag to the summary."""
 89 |         if self.situational_tags is None:
 90 |             self.situational_tags = []
 91 |         if tag not in self.situational_tags:
 92 |             self.situational_tags.append(tag)
 93 | 
 94 |     def calculate_compression_ratio(self, original_memory_count: int):
 95 |         """Calculate and set the compression ratio."""
 96 |         if original_memory_count > 0:
 97 |             self.compression_ratio = (
 98 |                 original_memory_count / 1
 99 |             )  # Summary is 1 compressed item
100 | 
101 |     def __str__(self) -> str:
102 |         """String representation of the summary."""
103 |         return f"Summary({self.memory_id}, {self.period_start_datetime.strftime('%Y-%m-%d')} to {self.period_end_datetime.strftime('%Y-%m-%d')}, {self.memory_units_count} memories)"
104 | 
105 |     def __repr__(self) -> str:
106 |         """Detailed string representation."""
107 |         return f"SummaryComponent(memory_id='{self.memory_id}', agent_id='{self.agent_id}', period='{self.period_start_datetime}' to '{self.period_end_datetime}', memories={self.memory_units_count})"
108 | 
109 | 
110 | class SummaryMetrics(BaseModel):
111 |     """
112 |     Metrics and analytics for summary generation and usage.
113 |     """
114 | 
115 |     total_summaries: int = 0
116 |     total_memories_compressed: int = 0
117 |     average_compression_ratio: float = 0.0
118 |     most_common_emotional_tags: List[str] = []
119 |     most_common_situational_tags: List[str] = []
120 |     persona_updates_triggered: int = 0
121 | 
122 |     def add_summary(self, summary: SummaryComponent):
123 |         """Add a summary to the metrics."""
124 |         self.total_summaries += 1
125 |         self.total_memories_compressed += summary.memory_units_count
126 | 
127 |         if summary.compression_ratio:
128 |             current_total = self.average_compression_ratio * (self.total_summaries - 1)
129 |             self.average_compression_ratio = (
130 |                 current_total + summary.compression_ratio
131 |             ) / self.total_summaries
132 | 
133 |     def get_compression_efficiency(self) -> float:
134 |         """Get overall compression efficiency."""
135 |         if self.total_summaries == 0:
136 |             return 0.0
137 |         return self.total_memories_compressed / self.total_summaries
138 | 


--------------------------------------------------------------------------------
/docs/internet-access/providers.md:
--------------------------------------------------------------------------------
  1 | # Internet Access Providers
  2 | 
  3 | MemoRizz treats internet tooling as a first‑class capability for Deep Research agents. This page explains how providers are discovered, how to configure them, and what to expect from the built-in integrations.
  4 | 
  5 | ## Provider Discovery Order
  6 | 
  7 | When a MemAgent is created with `ApplicationMode.DEEP_RESEARCH`, MemoRizz automatically attempts to attach an internet provider:
  8 | 
  9 | 1. **Explicit override** – Any provider passed via `.with_internet_access_provider(...)` takes priority.
 10 | 2. **Environment hint** – If `MEMORIZZ_DEFAULT_INTERNET_PROVIDER` is set, MemoRizz instantiates that provider (optionally with `MEMORIZZ_DEFAULT_INTERNET_PROVIDER_API_KEY`).
 11 | 3. **Tavily default** – If no override exists but `TAVILY_API_KEY` is present, MemoRizz prefers Tavily for its speed and structured research output.
 12 | 4. **Firecrawl fallback** – If Tavily is unavailable yet `FIRECRAWL_API_KEY` exists, MemoRizz creates a Firecrawl provider.
 13 | 5. **Offline provider** – When none of the above are configured, MemoRizz falls back to the built-in `offline` provider so the `internet_search` and `open_web_page` tools still exist and inform the agent/user how to enable real access.
 14 | 
 15 | Regardless of provider, every Deep Research agent (root, delegates, synthesis) registers two tools:
 16 | 
 17 | - `internet_search(query: str, max_results: int = 5)` – returns a list of normalized search results (`title`, `snippet`, `url`, `score`, optional metadata + raw payload).
 18 | - `open_web_page(url: str)` – fetches a URL and returns parsed content plus metadata (word count, truncation info, raw body when available).
 19 | 
 20 | You can also call `agent.search_internet(...)` or `agent.fetch_url(...)` directly from Python for the same behavior.
 21 | 
 22 | ## Configuring Providers
 23 | 
 24 | | Setting | Purpose |
 25 | | --- | --- |
 26 | | `MEMORIZZ_DEFAULT_INTERNET_PROVIDER` | Name registered via `register_provider` (e.g., `tavily`, `firecrawl`). |
 27 | | `MEMORIZZ_DEFAULT_INTERNET_PROVIDER_API_KEY` | API key passed to the provider constructed from the env hint. |
 28 | | `TAVILY_API_KEY` | Shortcut specifically for the Tavily provider (preferred). |
 29 | | `FIRECRAWL_API_KEY` | Shortcut specifically for the Firecrawl provider. |
 30 | 
 31 | To force a provider in code (e.g., for tests), build it manually and pass it to the builder:
 32 | 
 33 | ```python
 34 | from memorizz.internet_access.providers.tavily import TavilyProvider
 35 | from memorizz.memagent.builders import create_deep_research_agent
 36 | 
 37 | tavily = TavilyProvider(api_key="sk-...")
 38 | agent = (create_deep_research_agent("Web scout", internet_provider=tavily)
 39 |     .with_memory_provider(memory_provider)
 40 |     .build())
 41 | ```
 42 | 
 43 | You can also swap providers on an existing agent via `agent.with_internet_access_provider(new_provider)`.
 44 | 
 45 | ## Tavily Provider (Preferred)
 46 | 
 47 | The Tavily integration is the recommended default. It balances speed and extraction quality, and MemoRizz automatically wires it up for Deep Research agents whenever `TAVILY_API_KEY` exists.
 48 | 
 49 | 1. Export `TAVILY_API_KEY="<your-key>"`.
 50 | 2. Optionally configure `MEMORIZZ_DEFAULT_INTERNET_PROVIDER=tavily` to make every Deep Research agent pick it explicitly.
 51 | 3. (Optional) Pass a config dict to tune options such as `search_depth`, `default_max_results`, and `max_content_chars`.
 52 | 
 53 | ```python
 54 | TavilyProvider(
 55 |     api_key="sk-...",
 56 |     config={
 57 |         "search_depth": "advanced",
 58 |         "default_max_results": 8,
 59 |         "max_content_chars": 10_000,
 60 |         "include_raw_page": False,
 61 |     },
 62 | )
 63 | ```
 64 | 
 65 | Responses include truncation metadata whenever `max_content_chars` shortens an extracted page so downstream prompts can adapt.
 66 | 
 67 | ## Firecrawl Provider
 68 | 
 69 | The Firecrawl integration gives you search + crawl in a single dependency:
 70 | 
 71 | 1. Install the `firecrawl` extra in your environment (if needed).
 72 | 2. Export `FIRECRAWL_API_KEY="<your-key>"`.
 73 | 3. Optionally configure `MEMORIZZ_DEFAULT_INTERNET_PROVIDER=firecrawl` to ensure every Deep Research agent uses Firecrawl by default.
 74 | 
 75 | ### Advanced Configuration
 76 | 
 77 | The provider accepts extra keyword arguments via the config dict:
 78 | 
 79 | ```python
 80 | FirecrawlProvider(
 81 |     api_key="sk-...",
 82 |     base_url="https://api.firecrawl.dev/v1",
 83 |     timeout=45,
 84 |     config={
 85 |         "max_content_chars": 12_000,
 86 |         "max_raw_chars": 2_000,
 87 |         "include_raw_response": True,
 88 |     },
 89 | )
 90 | ```
 91 | 
 92 | When run via env variables, you can set the matching `MEMORIZZ_DEFAULT_INTERNET_PROVIDER_*` keys (or edit the config you pass to `create_internet_access_provider`) to tweak timeouts or base URLs.
 93 | 
 94 | ### Response Shape
 95 | 
 96 | - `internet_search` returns a list of objects containing `url`, `title`, `snippet`, optional `score`, and a `metadata` dict with provider-specific fields (`source`, `published_at`, etc.).
 97 | - `open_web_page` returns `title`, parsed `content` (Markdown), `metadata` describing truncation, and a `raw` dict with the full provider payload if `include_raw_response` is enabled.
 98 | 
 99 | MemoRizz automatically trims long documents to keep responses within the model’s context window, flagging truncated responses via `metadata["content_truncated"]`.
100 | 
101 | ## Offline Provider
102 | 
103 | If neither `MEMORIZZ_DEFAULT_INTERNET_PROVIDER` nor `TAVILY_API_KEY`/`FIRECRAWL_API_KEY` is configured, the `offline` provider keeps the internet tools available but responds with helpful error messages. This ensures Deep Research prompts remain stable even on air-gapped machines, while clearly signaling that live browsing is disabled.
104 | 


--------------------------------------------------------------------------------
/tests/unit/test_entity_memory.py:
--------------------------------------------------------------------------------
  1 | import uuid
  2 | from typing import Any, Dict, List, Optional
  3 | 
  4 | import pytest
  5 | 
  6 | from memorizz.enums.memory_type import MemoryType
  7 | from memorizz.long_term_memory.semantic.entity_memory import EntityMemory
  8 | from memorizz.memagent.managers.entity_memory_manager import EntityMemoryManager
  9 | 
 10 | 
 11 | class InMemoryEntityProvider:
 12 |     """Minimal provider that mimics the entity-memory interface."""
 13 | 
 14 |     def __init__(self):
 15 |         self.records: Dict[str, Dict[str, Any]] = {}
 16 | 
 17 |     def supports_entity_memory(self) -> bool:
 18 |         return True
 19 | 
 20 |     def store(self, data: Dict[str, Any], memory_store_type: MemoryType, **_) -> str:
 21 |         assert memory_store_type == MemoryType.ENTITY_MEMORY
 22 |         record = dict(data)
 23 |         record.setdefault("_id", record.get("entity_id", str(uuid.uuid4())))
 24 |         entity_id = record["entity_id"]
 25 | 
 26 |         self.records[entity_id] = record
 27 | 
 28 |         return record["_id"]
 29 | 
 30 |     def retrieve_by_query(
 31 |         self,
 32 |         query: Any,
 33 |         memory_type: MemoryType,
 34 |         limit: int = 5,
 35 |         memory_id: Optional[str] = None,
 36 |         **__,
 37 |     ) -> List[Dict[str, Any]]:
 38 |         assert memory_type == MemoryType.ENTITY_MEMORY
 39 | 
 40 |         if isinstance(query, dict):
 41 |             candidates = [
 42 |                 rec
 43 |                 for rec in self.records.values()
 44 |                 if all(rec.get(key) == value for key, value in query.items())
 45 |                 and (memory_id is None or rec.get("memory_id") == memory_id)
 46 |             ]
 47 |         else:
 48 |             candidates = [
 49 |                 rec
 50 |                 for rec in self.records.values()
 51 |                 if memory_id is None or rec.get("memory_id") == memory_id
 52 |             ]
 53 |         return candidates[:limit]
 54 | 
 55 |     def list_all(self, memory_store_type: MemoryType) -> List[Dict[str, Any]]:
 56 |         assert memory_store_type == MemoryType.ENTITY_MEMORY
 57 |         return [dict(rec) for rec in self.records.values()]
 58 | 
 59 | 
 60 | @pytest.fixture()
 61 | def provider() -> InMemoryEntityProvider:
 62 |     return InMemoryEntityProvider()
 63 | 
 64 | 
 65 | @pytest.fixture(autouse=True)
 66 | def mock_embeddings(monkeypatch):
 67 |     """Use deterministic embeddings so tests don't hit external services."""
 68 | 
 69 |     def _fake_embedding(text: str) -> List[float]:
 70 |         return [float(len(text or ""))]
 71 | 
 72 |     monkeypatch.setattr(
 73 |         "memorizz.long_term_memory.semantic.entity_memory.entity_memory.get_embedding",
 74 |         _fake_embedding,
 75 |     )
 76 | 
 77 | 
 78 | @pytest.fixture()
 79 | def entity_store(provider: InMemoryEntityProvider) -> EntityMemory:
 80 |     return EntityMemory(provider)
 81 | 
 82 | 
 83 | def test_upsert_merges_attributes(
 84 |     provider: InMemoryEntityProvider, entity_store: EntityMemory
 85 | ):
 86 |     entity_id = entity_store.upsert_entity(
 87 |         name="Avery",
 88 |         entity_type="customer",
 89 |         attributes=[{"name": "language", "value": "English"}],
 90 |         memory_id="tenant-1",
 91 |     )
 92 | 
 93 |     entity_store.upsert_entity(
 94 |         entity_id=entity_id,
 95 |         attributes=[{"name": "timezone", "value": "PST"}],
 96 |         memory_id="tenant-1",
 97 |     )
 98 | 
 99 |     assert len(provider.records) == 1
100 |     first_record = next(iter(provider.records.values()))
101 |     stored_attrs = {attr["name"]: attr["value"] for attr in first_record["attributes"]}
102 |     assert stored_attrs == {"language": "English", "timezone": "PST"}
103 | 
104 | 
105 | def test_record_attribute_creates_entity(
106 |     provider: InMemoryEntityProvider, entity_store: EntityMemory
107 | ):
108 |     entity_id = entity_store.record_attribute(
109 |         entity_name="Nova",
110 |         attribute_name="favorite_product",
111 |         attribute_value="Nebula Drone",
112 |         memory_id="tenant-2",
113 |     )
114 | 
115 |     stored = next(iter(provider.records.values()))
116 |     assert stored["entity_id"] == entity_id
117 |     assert stored["attributes"][0]["name"] == "favorite_product"
118 |     assert stored["attributes"][0]["value"] == "Nebula Drone"
119 | 
120 | 
121 | def test_manager_build_context_returns_profiles(
122 |     provider: InMemoryEntityProvider, entity_store: EntityMemory
123 | ):
124 |     entity_store.upsert_entity(
125 |         name="Taylor",
126 |         entity_type="analyst",
127 |         attributes=[{"name": "role", "value": "Analyst"}],
128 |         memory_id="team-7",
129 |     )
130 | 
131 |     manager = EntityMemoryManager(provider)
132 |     profiles = manager.build_context("analyst", memory_id="team-7")
133 | 
134 |     assert profiles and profiles[0]["attributes"]["role"] == "Analyst"
135 |     summary = manager.summarize_for_prompt(profiles)
136 |     assert "Taylor" in summary
137 |     assert "role: Analyst" in summary
138 | 
139 | 
140 | def test_manager_lookup_filters_by_memory_id(
141 |     provider: InMemoryEntityProvider, entity_store: EntityMemory
142 | ):
143 |     entity_store.upsert_entity(
144 |         name="Jordan",
145 |         entity_type="user",
146 |         attributes=[{"name": "tier", "value": "gold"}],
147 |         memory_id="org-a",
148 |     )
149 |     entity_store.upsert_entity(
150 |         name="Riley",
151 |         entity_type="user",
152 |         attributes=[{"name": "tier", "value": "silver"}],
153 |         memory_id="org-b",
154 |     )
155 | 
156 |     assert len(provider.records) == 2
157 |     manager = EntityMemoryManager(provider)
158 |     raw_matches = provider.retrieve_by_query(
159 |         "user", memory_type=MemoryType.ENTITY_MEMORY, memory_id="org-a"
160 |     )
161 |     assert len(raw_matches) == 1
162 |     matches = manager.lookup_entities(query="user", memory_id="org-a")
163 | 
164 |     assert len(matches) == 1
165 |     assert matches[0]["name"] == "Jordan"
166 | 


--------------------------------------------------------------------------------