├── sct.png
├── memory_strategies
├── __pycache__
│ ├── utils.cpython-310.pyc
│ ├── __init__.cpython-310.pyc
│ ├── ai_agent.cpython-310.pyc
│ ├── base_memory.cpython-310.pyc
│ ├── os_memory.cpython-310.pyc
│ ├── graph_memory.cpython-310.pyc
│ ├── retrieval_memory.cpython-310.pyc
│ ├── compression_memory.cpython-310.pyc
│ ├── sequential_memory.cpython-310.pyc
│ ├── hierarchical_memory.cpython-310.pyc
│ ├── sliding_window_memory.cpython-310.pyc
│ ├── summarization_memory.cpython-310.pyc
│ └── memory_augmented_memory.cpython-310.pyc
├── base_memory.py
├── __init__.py
├── sequential_memory.py
├── utils.py
├── sliding_window_memory.py
├── ai_agent.py
├── os_memory.py
├── hierarchical_memory.py
├── memory_augmented_memory.py
├── retrieval_memory.py
├── summarization_memory.py
├── graph_memory.py
└── compression_memory.py
├── requirements.txt
├── .gitignore
├── README.md
├── example_usage.py
├── LICENSE
├── api.py
└── streamlit_playground.py
/sct.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/sct.png
--------------------------------------------------------------------------------
/memory_strategies/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/utils.cpython-310.pyc
--------------------------------------------------------------------------------
/memory_strategies/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/memory_strategies/__pycache__/ai_agent.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/ai_agent.cpython-310.pyc
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | openai
2 | numpy
3 | faiss-cpu
4 | networkx
5 | tiktoken
6 | python-dotenv
7 | streamlit
8 | fastapi
9 | uvicorn
10 | pydantic
11 | plotly
12 |
--------------------------------------------------------------------------------
/memory_strategies/__pycache__/base_memory.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/base_memory.cpython-310.pyc
--------------------------------------------------------------------------------
/memory_strategies/__pycache__/os_memory.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/os_memory.cpython-310.pyc
--------------------------------------------------------------------------------
/memory_strategies/__pycache__/graph_memory.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/graph_memory.cpython-310.pyc
--------------------------------------------------------------------------------
/memory_strategies/__pycache__/retrieval_memory.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/retrieval_memory.cpython-310.pyc
--------------------------------------------------------------------------------
/memory_strategies/__pycache__/compression_memory.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/compression_memory.cpython-310.pyc
--------------------------------------------------------------------------------
/memory_strategies/__pycache__/sequential_memory.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/sequential_memory.cpython-310.pyc
--------------------------------------------------------------------------------
/memory_strategies/__pycache__/hierarchical_memory.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/hierarchical_memory.cpython-310.pyc
--------------------------------------------------------------------------------
/memory_strategies/__pycache__/sliding_window_memory.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/sliding_window_memory.cpython-310.pyc
--------------------------------------------------------------------------------
/memory_strategies/__pycache__/summarization_memory.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/summarization_memory.cpython-310.pyc
--------------------------------------------------------------------------------
/memory_strategies/__pycache__/memory_augmented_memory.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIAnytime/Agent-Memory-Playground/main/memory_strategies/__pycache__/memory_augmented_memory.cpython-310.pyc
--------------------------------------------------------------------------------
/memory_strategies/base_memory.py:
--------------------------------------------------------------------------------
1 | """
2 | Base Memory Strategy Abstract Class
3 |
4 | This module defines the abstract base class that all memory strategies must implement.
5 | It ensures consistency and interchangeability between different memory optimization techniques.
6 | """
7 |
8 | import abc
9 | from typing import Any, Dict, List, Optional
10 |
11 |
12 | class BaseMemoryStrategy(abc.ABC):
13 | """Abstract base class for all memory strategies."""
14 |
15 | @abc.abstractmethod
16 | def add_message(self, user_input: str, ai_response: str) -> None:
17 | """
18 | Add a new user-AI interaction to the memory storage.
19 |
20 | Args:
21 | user_input: The user's message
22 | ai_response: The AI's response
23 | """
24 | pass
25 |
26 | @abc.abstractmethod
27 | def get_context(self, query: str) -> str:
28 | """
29 | Retrieve and format relevant context from memory for the LLM.
30 |
31 | Args:
32 | query: The current user query to find relevant context for
33 |
34 | Returns:
35 | Formatted context string to send to the LLM
36 | """
37 | pass
38 |
39 | @abc.abstractmethod
40 | def clear(self) -> None:
41 | """
42 | Reset the memory storage, useful for starting new conversations.
43 | """
44 | pass
45 |
46 | def get_memory_stats(self) -> Dict[str, Any]:
47 | """
48 | Get statistics about the current memory usage.
49 |
50 | Returns:
51 | Dictionary containing memory statistics
52 | """
53 | return {
54 | "strategy_type": self.__class__.__name__,
55 | "memory_size": "Unknown"
56 | }
57 |
--------------------------------------------------------------------------------
/memory_strategies/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Memory Strategies Package
3 |
4 | This package contains 9 different memory optimization techniques for AI agents,
5 | ranging from simple sequential storage to complex operating system-like memory management.
6 | """
7 |
8 | from .base_memory import BaseMemoryStrategy
9 | from .ai_agent import AIAgent
10 | from .utils import generate_text, generate_embedding, count_tokens, get_openai_client
11 |
12 | # Basic Memory Strategies
13 | from .sequential_memory import SequentialMemory
14 | from .sliding_window_memory import SlidingWindowMemory
15 | from .summarization_memory import SummarizationMemory
16 |
17 | # Advanced Memory Strategies
18 | from .retrieval_memory import RetrievalMemory
19 | from .memory_augmented_memory import MemoryAugmentedMemory
20 | from .hierarchical_memory import HierarchicalMemory
21 |
22 | # Complex Memory Strategies
23 | from .graph_memory import GraphMemory
24 | from .compression_memory import CompressionMemory
25 | from .os_memory import OSMemory
26 |
27 | __all__ = [
28 | # Base classes
29 | "BaseMemoryStrategy",
30 | "AIAgent",
31 |
32 | # Utilities
33 | "generate_text",
34 | "generate_embedding",
35 | "count_tokens",
36 | "get_openai_client",
37 |
38 | # Basic strategies
39 | "SequentialMemory",
40 | "SlidingWindowMemory",
41 | "SummarizationMemory",
42 |
43 | # Advanced strategies
44 | "RetrievalMemory",
45 | "MemoryAugmentedMemory",
46 | "HierarchicalMemory",
47 |
48 | # Complex strategies
49 | "GraphMemory",
50 | "CompressionMemory",
51 | "OSMemory"
52 | ]
53 |
54 | # Strategy metadata for easy reference
55 | STRATEGY_INFO = {
56 | "SequentialMemory": {
57 | "complexity": "Basic",
58 | "description": "Stores all conversation history chronologically",
59 | "best_for": "Simple, short-term chatbots"
60 | },
61 | "SlidingWindowMemory": {
62 | "complexity": "Basic",
63 | "description": "Maintains only the most recent N conversations",
64 | "best_for": "Controlled memory usage scenarios"
65 | },
66 | "SummarizationMemory": {
67 | "complexity": "Basic",
68 | "description": "Compresses conversation history using LLM summarization",
69 | "best_for": "Long-term creative conversations"
70 | },
71 | "RetrievalMemory": {
72 | "complexity": "Advanced",
73 | "description": "Uses vector embeddings and similarity search (RAG)",
74 | "best_for": "Accurate long-term recall, industry standard"
75 | },
76 | "MemoryAugmentedMemory": {
77 | "complexity": "Advanced",
78 | "description": "Combines sliding window with persistent memory tokens",
79 | "best_for": "Personal assistants requiring fact retention"
80 | },
81 | "HierarchicalMemory": {
82 | "complexity": "Advanced",
83 | "description": "Multi-layered system with working + long-term memory",
84 | "best_for": "Human-like cognitive patterns"
85 | },
86 | "GraphMemory": {
87 | "complexity": "Complex",
88 | "description": "Treats conversations as nodes with relationship edges",
89 | "best_for": "Expert systems and knowledge bases"
90 | },
91 | "CompressionMemory": {
92 | "complexity": "Complex",
93 | "description": "Intelligent compression and integration of historical data",
94 | "best_for": "Space-constrained environments"
95 | },
96 | "OSMemory": {
97 | "complexity": "Complex",
98 | "description": "Simulates RAM/disk with active/passive memory",
99 | "best_for": "Large-scale systems with unlimited memory needs"
100 | }
101 | }
102 |
--------------------------------------------------------------------------------
/memory_strategies/sequential_memory.py:
--------------------------------------------------------------------------------
1 | """
2 | Sequential Memory Strategy
3 |
4 | This is the most basic memory strategy that stores the entire conversation
5 | history in chronological order. While it provides perfect recall, it's not
6 | scalable as the context grows linearly with each conversation turn.
7 | """
8 |
9 | from typing import List, Dict, Any
10 | from .base_memory import BaseMemoryStrategy
11 | from .utils import count_tokens
12 |
13 |
14 | class SequentialMemory(BaseMemoryStrategy):
15 | """
16 | Sequential memory strategy that stores all conversation history.
17 |
18 | Advantages:
19 | - Simple implementation
20 | - Perfect recall of all conversations
21 | - Complete context preservation
22 |
23 | Disadvantages:
24 | - Linear token growth with conversation length
25 | - Expensive for long conversations
26 | - May hit token limits quickly
27 | """
28 |
29 | def __init__(self):
30 | """Initialize memory with empty list to store conversation history."""
31 | self.history: List[Dict[str, str]] = []
32 | self.total_content_tokens = 0 # Track cumulative content token usage
33 | self.total_prompt_tokens = 0 # Track cumulative prompt tokens sent to LLM
34 |
35 | def add_message(self, user_input: str, ai_response: str) -> None:
36 | """
37 | Add new user-AI interaction to history.
38 |
39 | Each interaction is stored as two dictionary entries in the list.
40 |
41 | Args:
42 | user_input: User's message
43 | ai_response: AI's response
44 | """
45 | self.history.append({"role": "user", "content": user_input})
46 | self.history.append({"role": "assistant", "content": ai_response})
47 |
48 | # Update content token count (just the message content)
49 | self.total_content_tokens += count_tokens(user_input + ai_response)
50 |
51 | def get_context(self, query: str) -> str:
52 | """
53 | Retrieve entire conversation history formatted as a single string.
54 |
55 | The 'query' parameter is ignored since this strategy always
56 | returns the complete history.
57 |
58 | Args:
59 | query: Current user query (ignored in this strategy)
60 |
61 | Returns:
62 | Complete conversation history as formatted string
63 | """
64 | if not self.history:
65 | return "No conversation history yet."
66 |
67 | # Join all messages into a single string separated by newlines
68 | return "\n".join([
69 | f"{turn['role'].capitalize()}: {turn['content']}"
70 | for turn in self.history
71 | ])
72 |
73 | def clear(self) -> None:
74 | """Reset conversation history by clearing the list."""
75 | self.history = []
76 | self.total_content_tokens = 0
77 | self.total_prompt_tokens = 0
78 | print("Sequential memory cleared.")
79 |
80 | def get_memory_stats(self) -> Dict[str, Any]:
81 | """
82 | Get statistics about current memory usage.
83 |
84 | Returns:
85 | Dictionary containing memory statistics
86 | """
87 | total_messages = len(self.history)
88 | total_turns = total_messages // 2 # Each turn has user + assistant message
89 |
90 | return {
91 | "strategy_type": "SequentialMemory",
92 | "total_messages": total_messages,
93 | "total_turns": total_turns,
94 | "total_content_tokens": self.total_content_tokens,
95 | "total_prompt_tokens": self.total_prompt_tokens,
96 | "memory_size": f"{total_messages} messages",
97 | "advantages": ["Perfect recall", "Simple implementation"],
98 | "disadvantages": ["Linear token growth", "Not scalable"]
99 | }
100 |
--------------------------------------------------------------------------------
/memory_strategies/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Utility Functions for Memory Strategies
3 |
4 | This module provides core utility functions used across all memory strategies,
5 | including text generation, embedding generation, and token counting.
6 | """
7 |
8 | import os
9 | import time
10 | import tiktoken
11 | from typing import List, Optional
12 | from openai import OpenAI
13 |
14 |
15 | # Initialize tokenizer for token counting
16 | tokenizer = tiktoken.get_encoding("cl100k_base")
17 |
18 | # Model configurations
19 | GENERATION_MODEL = "gpt-4o-mini"
20 | EMBEDDING_MODEL = "text-embedding-3-small"
21 |
22 |
23 | def get_openai_client() -> OpenAI:
24 | """
25 | Initialize and return OpenAI client with API key from environment.
26 |
27 | Returns:
28 | Configured OpenAI client instance
29 | """
30 | api_key = os.getenv("OPENAI_API_KEY")
31 | if not api_key:
32 | raise ValueError("OPENAI_API_KEY not found in environment variables")
33 |
34 | return OpenAI(api_key=api_key)
35 |
36 |
37 | def generate_text(system_prompt: str, user_prompt: str, client: Optional[OpenAI] = None) -> str:
38 | """
39 | Generate text response using the LLM API.
40 |
41 | Args:
42 | system_prompt: System instructions defining AI role and behavior
43 | user_prompt: User input that AI should respond to
44 | client: Optional OpenAI client instance
45 |
46 | Returns:
47 | Generated text content from the AI
48 | """
49 | if client is None:
50 | client = get_openai_client()
51 |
52 | try:
53 | response = client.chat.completions.create(
54 | model=GENERATION_MODEL,
55 | messages=[
56 | {"role": "system", "content": system_prompt},
57 | {"role": "user", "content": user_prompt}
58 | ],
59 | temperature=0.7,
60 | max_tokens=1000
61 | )
62 | return response.choices[0].message.content
63 | except Exception as e:
64 | return f"Error generating text: {str(e)}"
65 |
66 |
67 | def generate_embedding(text: str, client: Optional[OpenAI] = None) -> List[float]:
68 | """
69 | Generate embedding vector for given text using the embedding model.
70 |
71 | Args:
72 | text: Input text to convert to embedding vector
73 | client: Optional OpenAI client instance
74 |
75 | Returns:
76 | List of floats representing the embedding vector
77 | """
78 | if client is None:
79 | client = get_openai_client()
80 |
81 | try:
82 | response = client.embeddings.create(
83 | model=EMBEDDING_MODEL,
84 | input=text
85 | )
86 | return response.data[0].embedding
87 | except Exception as e:
88 | print(f"Error generating embedding: {str(e)}")
89 | return []
90 |
91 |
92 | def count_tokens(text: str) -> int:
93 | """
94 | Count the number of tokens in the given text string.
95 |
96 | Args:
97 | text: String to tokenize and count
98 |
99 | Returns:
100 | Integer count of tokens
101 | """
102 | return len(tokenizer.encode(text))
103 |
104 |
105 | def format_conversation_turn(user_input: str, ai_response: str) -> str:
106 | """
107 | Format a conversation turn into a standardized string format.
108 |
109 | Args:
110 | user_input: User's message
111 | ai_response: AI's response
112 |
113 | Returns:
114 | Formatted conversation turn string
115 | """
116 | return f"User: {user_input}\nAssistant: {ai_response}"
117 |
118 |
119 | def measure_time(func):
120 | """
121 | Decorator to measure execution time of functions.
122 |
123 | Args:
124 | func: Function to measure
125 |
126 | Returns:
127 | Wrapper function that measures execution time
128 | """
129 | def wrapper(*args, **kwargs):
130 | start_time = time.time()
131 | result = func(*args, **kwargs)
132 | end_time = time.time()
133 | execution_time = end_time - start_time
134 | print(f"[TIMING] {func.__name__} executed in {execution_time:.4f} seconds")
135 | return result
136 | return wrapper
137 |
--------------------------------------------------------------------------------
/memory_strategies/sliding_window_memory.py:
--------------------------------------------------------------------------------
1 | """
2 | Sliding Window Memory Strategy
3 |
4 | This strategy maintains only the most recent N conversation turns using a fixed-size
5 | window. It prevents unbounded context growth but may lose important historical information.
6 | """
7 |
8 | from collections import deque
9 | from typing import List, Dict, Any
10 | from .base_memory import BaseMemoryStrategy
11 | from .utils import count_tokens
12 |
13 |
14 | class SlidingWindowMemory(BaseMemoryStrategy):
15 | """
16 | Sliding window memory strategy that keeps only recent N conversation turns.
17 |
18 | Advantages:
19 | - Controlled memory usage
20 | - Predictable token consumption
21 | - Scalable for long conversations
22 |
23 | Disadvantages:
24 | - Loses old information
25 | - May forget important early context
26 | - Fixed window size may not suit all scenarios
27 | """
28 |
29 | def __init__(self, window_size: int = 4):
30 | """
31 | Initialize memory with fixed-size deque.
32 |
33 | Args:
34 | window_size: Number of conversation turns to retain in memory.
35 | A single turn includes one user message and one AI response.
36 | """
37 | self.window_size = window_size
38 | # Deque with maxlen automatically discards oldest items when full
39 | self.history = deque(maxlen=window_size)
40 | self.total_content_tokens = 0 # Track cumulative content token usage
41 | self.total_prompt_tokens = 0 # Track cumulative prompt tokens sent to LLM
42 |
43 | def add_message(self, user_input: str, ai_response: str) -> None:
44 | """
45 | Add new conversation turn to history.
46 |
47 | If deque is full, the oldest turn is automatically removed.
48 |
49 | Args:
50 | user_input: User's message
51 | ai_response: AI's response
52 | """
53 | # Each turn (user input + AI response) is stored as a single element
54 | # This makes it easy to manage window size by turns
55 | turn_data = [
56 | {"role": "user", "content": user_input},
57 | {"role": "assistant", "content": ai_response}
58 | ]
59 | self.history.append(turn_data)
60 |
61 | # Update content token count (just the message content)
62 | self.total_content_tokens += count_tokens(user_input + ai_response)
63 |
64 | def get_context(self, query: str) -> str:
65 | """
66 | Retrieve conversation history within current window.
67 |
68 | The 'query' parameter is ignored in this strategy.
69 |
70 | Args:
71 | query: Current user query (ignored in this strategy)
72 |
73 | Returns:
74 | Recent conversation history as formatted string
75 | """
76 | if not self.history:
77 | return "No conversation history yet."
78 |
79 | # Create temporary list to hold formatted messages
80 | context_list = []
81 |
82 | # Iterate through each turn stored in the deque
83 | for turn in self.history:
84 | # Iterate through user and assistant messages in the turn
85 | for message in turn:
86 | # Format message and add to our list
87 | context_list.append(f"{message['role'].capitalize()}: {message['content']}")
88 |
89 | # Join all formatted messages into a single string
90 | return "\n".join(context_list)
91 |
92 | def clear(self) -> None:
93 | """Reset conversation history by clearing the deque."""
94 | self.history.clear()
95 | self.total_content_tokens = 0
96 | self.total_prompt_tokens = 0
97 | print("Sliding window memory cleared.")
98 |
99 | def get_memory_stats(self) -> Dict[str, Any]:
100 | """
101 | Get statistics about current memory usage.
102 |
103 | Returns:
104 | Dictionary containing memory statistics
105 | """
106 | current_turns = len(self.history)
107 | total_messages = sum(len(turn) for turn in self.history)
108 |
109 | return {
110 | "strategy_type": "SlidingWindowMemory",
111 | "window_size": self.window_size,
112 | "current_turns": current_turns,
113 | "total_messages": total_messages,
114 | "total_content_tokens": self.total_content_tokens,
115 | "total_prompt_tokens": self.total_prompt_tokens,
116 | "memory_size": f"{current_turns}/{self.window_size} turns",
117 | "advantages": ["Controlled memory", "Predictable tokens", "Scalable"],
118 | "disadvantages": ["Loses old info", "Fixed window size"]
119 | }
120 |
--------------------------------------------------------------------------------
/memory_strategies/ai_agent.py:
--------------------------------------------------------------------------------
1 | """
2 | AI Agent Class
3 |
4 | This module contains the core AI Agent that coordinates conversation flow
5 | and works with different memory strategies using the strategy pattern.
6 | """
7 |
8 | import time
9 | from typing import Optional
10 | from openai import OpenAI
11 |
12 | from .base_memory import BaseMemoryStrategy
13 | from .utils import generate_text, count_tokens, get_openai_client
14 |
15 |
16 | class AIAgent:
17 | """
18 | Main AI Agent class designed to work with any memory strategy.
19 |
20 | Uses the strategy pattern to allow switching between different
21 | memory management approaches at runtime.
22 | """
23 |
24 | def __init__(
25 | self,
26 | memory_strategy: BaseMemoryStrategy,
27 | system_prompt: str = "You are a helpful AI assistant.",
28 | client: Optional[OpenAI] = None
29 | ):
30 | """
31 | Initialize the AI agent.
32 |
33 | Args:
34 | memory_strategy: Instance of a class inheriting from BaseMemoryStrategy
35 | system_prompt: Initial instructions for the LLM defining its personality
36 | client: Optional OpenAI client instance
37 | """
38 | self.memory = memory_strategy
39 | self.system_prompt = system_prompt
40 | self.client = client or get_openai_client()
41 | print(f"Agent initialized with {type(memory_strategy).__name__}.")
42 |
43 | def chat(self, user_input: str, verbose: bool = True) -> dict:
44 | """
45 | Process a single conversation turn.
46 |
47 | Args:
48 | user_input: The user's latest message
49 | verbose: Whether to print detailed debug information
50 |
51 | Returns:
52 | Dictionary containing response and performance metrics
53 | """
54 | if verbose:
55 | print(f"\n{'='*25} NEW INTERACTION {'='*25}")
56 | print(f"User > {user_input}")
57 |
58 | # Step 1: Retrieve context from the agent's memory strategy
59 | start_time = time.time()
60 | context = self.memory.get_context(query=user_input)
61 | retrieval_time = time.time() - start_time
62 |
63 | # Step 2: Build complete prompt for the LLM
64 | full_user_prompt = f"### MEMORY CONTEXT\n{context}\n\n### CURRENT REQUEST\n{user_input}"
65 |
66 | # Step 3: Calculate token usage for debugging
67 | prompt_tokens = count_tokens(self.system_prompt + full_user_prompt)
68 |
69 | if verbose:
70 | print("\n--- Agent Debug Info ---")
71 | print(f"Memory Retrieval Time: {retrieval_time:.4f} seconds")
72 | print(f"Estimated Prompt Tokens: {prompt_tokens}")
73 | print(f"\n[Context Retrieved]:\n{context}\n")
74 |
75 | # Step 4: Call LLM to get response
76 | start_time = time.time()
77 | ai_response = generate_text(self.system_prompt, full_user_prompt, self.client)
78 | generation_time = time.time() - start_time
79 |
80 | # Step 5: Update memory with the latest interaction
81 | self.memory.add_message(user_input, ai_response)
82 |
83 | # Step 6: Update prompt token tracking if memory strategy supports it
84 | if hasattr(self.memory, 'total_prompt_tokens'):
85 | self.memory.total_prompt_tokens += prompt_tokens
86 |
87 | # Step 7: Display AI response and performance metrics
88 | if verbose:
89 | print(f"\nAgent > {ai_response}")
90 | print(f"(LLM Generation Time: {generation_time:.4f} seconds)")
91 | print(f"{'='*70}")
92 |
93 | return {
94 | "user_input": user_input,
95 | "ai_response": ai_response,
96 | "retrieval_time": retrieval_time,
97 | "generation_time": generation_time,
98 | "prompt_tokens": prompt_tokens,
99 | "context": context
100 | }
101 |
102 | def get_memory_stats(self) -> dict:
103 | """
104 | Get current memory statistics.
105 |
106 | Returns:
107 | Dictionary containing memory usage statistics
108 | """
109 | return self.memory.get_memory_stats()
110 |
111 | def clear_memory(self) -> None:
112 | """
113 | Clear the agent's memory.
114 | """
115 | self.memory.clear()
116 | print("Agent memory cleared.")
117 |
118 | def set_system_prompt(self, new_prompt: str) -> None:
119 | """
120 | Update the system prompt.
121 |
122 | Args:
123 | new_prompt: New system prompt to use
124 | """
125 | self.system_prompt = new_prompt
126 | print(f"System prompt updated to: {new_prompt[:50]}...")
127 |
--------------------------------------------------------------------------------
/memory_strategies/os_memory.py:
--------------------------------------------------------------------------------
1 | """
2 | Operating System-like Memory Management Strategy
3 |
4 | This strategy simulates how computer operating systems manage memory with
5 | RAM (active memory) and disk (passive memory), implementing paging mechanisms
6 | for intelligent memory management.
7 | """
8 |
9 | from collections import deque
10 | from typing import Dict, Any, Optional, Tuple
11 | from .base_memory import BaseMemoryStrategy
12 |
13 |
14 | class OSMemory(BaseMemoryStrategy):
15 | """
16 | OS-like memory management strategy simulating RAM and disk storage.
17 |
18 | Advantages:
19 | - Scalable memory management
20 | - Intelligent paging system
21 | - Efficient active context
22 | - Nearly unlimited memory capacity
23 |
24 | Disadvantages:
25 | - Complex paging logic
26 | - May miss relevant passive information
27 | - Requires tuning of RAM size
28 | - Page fault overhead
29 | """
30 |
31 | def __init__(self, ram_size: int = 2):
32 | """
33 | Initialize OS-like memory system.
34 |
35 | Args:
36 | ram_size: Maximum number of conversation turns to retain in active memory (RAM)
37 | """
38 | self.ram_size = ram_size
39 |
40 | # 'RAM' is a deque that holds recent turns
41 | self.active_memory: deque = deque()
42 |
43 | # 'Hard disk' is a dictionary for storing paged-out turns
44 | self.passive_memory: Dict[int, str] = {}
45 |
46 | # Counter to give each turn a unique ID
47 | self.turn_count = 0
48 |
49 | def add_message(self, user_input: str, ai_response: str) -> None:
50 | """
51 | Add turn to active memory, page out oldest turn to passive memory if RAM is full.
52 |
53 | Args:
54 | user_input: User's message
55 | ai_response: AI's response
56 | """
57 | turn_id = self.turn_count
58 | turn_data = f"User: {user_input}\nAI: {ai_response}"
59 |
60 | # Check if active memory (RAM) is full
61 | if len(self.active_memory) >= self.ram_size:
62 | # If so, remove least recently used (oldest) item from active memory
63 | lru_turn_id, lru_turn_data = self.active_memory.popleft()
64 |
65 | # Move it to passive memory (hard disk)
66 | self.passive_memory[lru_turn_id] = lru_turn_data
67 | print(f"--- [OS Memory: Paging out Turn {lru_turn_id} to passive storage.] ---")
68 |
69 | # Add new turn to active memory
70 | self.active_memory.append((turn_id, turn_data))
71 | self.turn_count += 1
72 |
73 | def get_context(self, query: str) -> str:
74 | """
75 | Provide RAM context and simulate 'page faults' by pulling from passive memory if needed.
76 |
77 | Args:
78 | query: Current user query
79 |
80 | Returns:
81 | Context from active memory and any paged-in passive memory
82 | """
83 | # Base context is always what's in active memory
84 | active_context = "\n".join([data for _, data in self.active_memory])
85 |
86 | # Simulate page fault: check if any words in query match content in passive memory
87 | paged_in_context = ""
88 | query_words = [word.lower() for word in query.split() if len(word) > 3]
89 |
90 | for turn_id, data in self.passive_memory.items():
91 | # Check for keyword matches in passive memory
92 | if any(word in data.lower() for word in query_words):
93 | paged_in_context += f"\n(Paged in from Turn {turn_id}): {data}"
94 | print(f"--- [OS Memory: Page fault! Paging in Turn {turn_id} from passive storage.] ---")
95 |
96 | # Combine active context with any paged-in context
97 | if paged_in_context:
98 | return f"### Active Memory (RAM):\n{active_context}\n\n### Paged-In from Passive Memory (Disk):\n{paged_in_context}"
99 | else:
100 | return f"### Active Memory (RAM):\n{active_context}" if active_context else "No information in memory yet."
101 |
102 | def clear(self) -> None:
103 | """Clear both active and passive memory storage."""
104 | self.active_memory.clear()
105 | self.passive_memory = {}
106 | self.turn_count = 0
107 | print("OS-like memory cleared.")
108 |
109 | def get_memory_stats(self) -> Dict[str, Any]:
110 | """
111 | Get statistics about current memory usage.
112 |
113 | Returns:
114 | Dictionary containing memory statistics
115 | """
116 | active_turns = len(self.active_memory)
117 | passive_turns = len(self.passive_memory)
118 | total_turns = self.turn_count
119 |
120 | return {
121 | "strategy_type": "OSMemory",
122 | "ram_size": self.ram_size,
123 | "active_turns": active_turns,
124 | "passive_turns": passive_turns,
125 | "total_turns": total_turns,
126 | "memory_size": f"{active_turns} in RAM, {passive_turns} on disk",
127 | "advantages": ["Scalable management", "Intelligent paging", "Unlimited capacity"],
128 | "disadvantages": ["Complex paging", "May miss passive info", "Page fault overhead"]
129 | }
130 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[codz]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py.cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # UV
98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | #uv.lock
102 |
103 | # poetry
104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | # This is especially recommended for binary packages to ensure reproducibility, and is more
106 | # commonly ignored for libraries.
107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | #poetry.toml
110 |
111 | # pdm
112 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113 | # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114 | # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115 | #pdm.lock
116 | #pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 |
120 | # pixi
121 | # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122 | #pixi.lock
123 | # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124 | # in the .venv directory. It is recommended not to include this directory in version control.
125 | .pixi
126 |
127 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128 | __pypackages__/
129 |
130 | # Celery stuff
131 | celerybeat-schedule
132 | celerybeat.pid
133 |
134 | # SageMath parsed files
135 | *.sage.py
136 |
137 | # Environments
138 | .env
139 | .envrc
140 | .venv
141 | env/
142 | venv/
143 | ENV/
144 | env.bak/
145 | venv.bak/
146 |
147 | # Spyder project settings
148 | .spyderproject
149 | .spyproject
150 |
151 | # Rope project settings
152 | .ropeproject
153 |
154 | # mkdocs documentation
155 | /site
156 |
157 | # mypy
158 | .mypy_cache/
159 | .dmypy.json
160 | dmypy.json
161 |
162 | # Pyre type checker
163 | .pyre/
164 |
165 | # pytype static type analyzer
166 | .pytype/
167 |
168 | # Cython debug symbols
169 | cython_debug/
170 |
171 | # PyCharm
172 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174 | # and can be added to the global gitignore or merged into this file. For a more nuclear
175 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176 | #.idea/
177 |
178 | # Abstra
179 | # Abstra is an AI-powered process automation framework.
180 | # Ignore directories containing user credentials, local state, and settings.
181 | # Learn more at https://abstra.io/docs
182 | .abstra/
183 |
184 | # Visual Studio Code
185 | # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186 | # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187 | # and can be added to the global gitignore or merged into this file. However, if you prefer,
188 | # you could uncomment the following to ignore the entire vscode folder
189 | # .vscode/
190 |
191 | # Ruff stuff:
192 | .ruff_cache/
193 |
194 | # PyPI configuration file
195 | .pypirc
196 |
197 | # Cursor
198 | # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199 | # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200 | # refer to https://docs.cursor.com/context/ignore-files
201 | .cursorignore
202 | .cursorindexingignore
203 |
204 | # Marimo
205 | marimo/_static/
206 | marimo/_lsp/
207 | __marimo__/
208 |
--------------------------------------------------------------------------------
/memory_strategies/hierarchical_memory.py:
--------------------------------------------------------------------------------
1 | """
2 | Hierarchical Memory Strategy
3 |
4 | This strategy combines multiple memory types into a layered system that mimics
5 | human memory patterns with working memory (short-term) and long-term memory layers.
6 | """
7 |
8 | from typing import List, Dict, Any, Optional
9 | from openai import OpenAI
10 | from .base_memory import BaseMemoryStrategy
11 | from .sliding_window_memory import SlidingWindowMemory
12 | from .retrieval_memory import RetrievalMemory
13 | from .utils import get_openai_client
14 |
15 |
16 | class HierarchicalMemory(BaseMemoryStrategy):
17 | """
18 | Hierarchical memory strategy combining working memory and long-term memory.
19 |
20 | Advantages:
21 | - Multi-level information processing
22 | - Intelligent information promotion
23 | - Combines strengths of multiple strategies
24 | - Resembles human cognitive patterns
25 |
26 | Disadvantages:
27 | - Complex implementation
28 | - Multiple memory systems to manage
29 | - Promotion logic may need tuning
30 | - Higher computational overhead
31 | """
32 |
33 | def __init__(
34 | self,
35 | window_size: int = 2,
36 | k: int = 2,
37 | embedding_dim: int = 1536,
38 | client: Optional[OpenAI] = None
39 | ):
40 | """
41 | Initialize hierarchical memory system.
42 |
43 | Args:
44 | window_size: Size of short-term working memory (in turns)
45 | k: Number of documents to retrieve from long-term memory
46 | embedding_dim: Embedding vector dimension for long-term memory
47 | client: Optional OpenAI client instance
48 | """
49 | print("Initializing Hierarchical Memory...")
50 | self.client = client or get_openai_client()
51 |
52 | # Level 1: Fast, short-term working memory using sliding window
53 | self.working_memory = SlidingWindowMemory(window_size=window_size)
54 |
55 | # Level 2: Slower, persistent long-term memory using retrieval system
56 | self.long_term_memory = RetrievalMemory(k=k, embedding_dim=embedding_dim, client=self.client)
57 |
58 | # Simple heuristic: keywords that trigger promotion from working to long-term memory
59 | self.promotion_keywords = ["remember", "rule", "preference", "always", "never", "allergic", "important"]
60 |
61 | def add_message(self, user_input: str, ai_response: str) -> None:
62 | """
63 | Add messages to working memory and conditionally promote to long-term memory.
64 |
65 | Args:
66 | user_input: User's message
67 | ai_response: AI's response
68 | """
69 | # All interactions are added to fast, short-term working memory
70 | self.working_memory.add_message(user_input, ai_response)
71 |
72 | # Promotion logic: check if user input contains keywords indicating
73 | # information is important and should be stored long-term
74 | if any(keyword in user_input.lower() for keyword in self.promotion_keywords):
75 | print(f"--- [Hierarchical Memory: Promoting message to long-term storage.] ---")
76 | # If keywords found, also add interaction to long-term retrieval memory
77 | self.long_term_memory.add_message(user_input, ai_response)
78 |
79 | def get_context(self, query: str) -> str:
80 | """
81 | Construct rich context by combining relevant information from both memory layers.
82 |
83 | Args:
84 | query: Current user query
85 |
86 | Returns:
87 | Combined context from long-term and short-term memory
88 | """
89 | # Get recent context from working memory
90 | working_context = self.working_memory.get_context(query)
91 |
92 | # Retrieve relevant content from long-term memory
93 | long_term_context = self.long_term_memory.get_context(query)
94 |
95 | # If no relevant content in long-term memory, use only working memory
96 | if ("No information in memory yet" in long_term_context or
97 | "Could not find any relevant information" in long_term_context):
98 | return f"### Recent Context:\n{working_context}"
99 | else:
100 | # Otherwise, combine both memory layers
101 | return f"### Long-Term Context:\n{long_term_context}\n\n### Recent Context:\n{working_context}"
102 |
103 | def clear(self) -> None:
104 | """Reset both working memory and long-term memory."""
105 | self.working_memory.clear()
106 | self.long_term_memory.clear()
107 | print("Hierarchical memory cleared.")
108 |
109 | def get_memory_stats(self) -> Dict[str, Any]:
110 | """
111 | Get statistics about current memory usage from both layers.
112 |
113 | Returns:
114 | Dictionary containing memory statistics
115 | """
116 | working_stats = self.working_memory.get_memory_stats()
117 | long_term_stats = self.long_term_memory.get_memory_stats()
118 |
119 | return {
120 | "strategy_type": "HierarchicalMemory",
121 | "promotion_keywords": self.promotion_keywords,
122 | "working_memory_stats": working_stats,
123 | "long_term_memory_stats": long_term_stats,
124 | "memory_size": f"Working: {working_stats['memory_size']}, Long-term: {long_term_stats['memory_size']}",
125 | "advantages": ["Multi-level processing", "Intelligent promotion", "Human-like patterns"],
126 | "disadvantages": ["Complex implementation", "Multiple systems", "Overhead"]
127 | }
128 |
--------------------------------------------------------------------------------
/memory_strategies/memory_augmented_memory.py:
--------------------------------------------------------------------------------
1 | """
2 | Memory-Augmented Memory Strategy
3 |
4 | This strategy simulates memory-enhanced transformer behavior by maintaining
5 | a short-term sliding window of recent conversations and a separate list of
6 | "memory tokens" - important facts extracted from conversations.
7 | """
8 |
9 | from typing import List, Dict, Any, Optional
10 | from openai import OpenAI
11 | from .base_memory import BaseMemoryStrategy
12 | from .sliding_window_memory import SlidingWindowMemory
13 | from .utils import generate_text, get_openai_client
14 |
15 |
16 | class MemoryAugmentedMemory(BaseMemoryStrategy):
17 | """
18 | Memory-augmented strategy combining sliding window with persistent memory tokens.
19 |
20 | Advantages:
21 | - Excellent long-term retention of key information
22 | - Suitable for evolving long-term conversations
23 | - Intelligent fact extraction mechanism
24 | - Strong foundation for personal assistants
25 |
26 | Disadvantages:
27 | - More complex implementation
28 | - Additional LLM calls increase cost
29 | - Depends on fact extraction quality
30 | - May increase response time
31 | """
32 |
33 | def __init__(self, window_size: int = 2, client: Optional[OpenAI] = None):
34 | """
35 | Initialize memory-augmented system.
36 |
37 | Args:
38 | window_size: Number of recent turns to retain in short-term memory
39 | client: Optional OpenAI client instance
40 | """
41 | self.client = client or get_openai_client()
42 |
43 | # Use SlidingWindowMemory instance to manage recent conversation history
44 | self.recent_memory = SlidingWindowMemory(window_size=window_size)
45 |
46 | # List to store special, persistent "sticky notes" or key facts
47 | self.memory_tokens: List[str] = []
48 |
49 | def add_message(self, user_input: str, ai_response: str) -> None:
50 | """
51 | Add latest turn to recent memory, then use LLM call to decide
52 | if new persistent memory tokens should be created from this interaction.
53 |
54 | Args:
55 | user_input: User's message
56 | ai_response: AI's response
57 | """
58 | # First, add new interaction to short-term sliding window memory
59 | self.recent_memory.add_message(user_input, ai_response)
60 |
61 | # Construct prompt for LLM to analyze conversation turn and
62 | # determine if it contains core facts worth remembering long-term
63 | fact_extraction_prompt = (
64 | f"Analyze the following conversation turn. Does it contain a core fact, preference, or decision that should be remembered long-term? "
65 | f"Examples include user preferences ('I hate flying'), key decisions ('The budget is $1000'), or important facts ('My user ID is 12345').\n\n"
66 | f"Conversation Turn:\nUser: {user_input}\nAI: {ai_response}\n\n"
67 | f"If it contains such a fact, state the fact concisely in one sentence. Otherwise, respond with 'No important fact.'"
68 | )
69 |
70 | # Call LLM to perform fact extraction
71 | extracted_fact = generate_text(
72 | "You are a fact-extraction expert.",
73 | fact_extraction_prompt,
74 | self.client
75 | )
76 |
77 | # Check if LLM's response indicates an important fact was found
78 | if "no important fact" not in extracted_fact.lower():
79 | # If fact found, print debug message and add to memory tokens list
80 | print(f"--- [Memory Augmentation: New memory token created: '{extracted_fact}'] ---")
81 | self.memory_tokens.append(extracted_fact)
82 |
83 | def get_context(self, query: str) -> str:
84 | """
85 | Construct context by combining short-term recent conversation
86 | with list of all long-term, persistent memory tokens.
87 |
88 | Args:
89 | query: Current user query
90 |
91 | Returns:
92 | Combined context from memory tokens and recent conversation
93 | """
94 | # Get context from short-term sliding window
95 | recent_context = self.recent_memory.get_context(query)
96 |
97 | # Format memory tokens list as readable string
98 | if self.memory_tokens:
99 | memory_token_context = "\n".join([f"- {token}" for token in self.memory_tokens])
100 | return f"### Key Memory Tokens (Long-Term Facts):\n{memory_token_context}\n\n### Recent Conversation:\n{recent_context}"
101 | else:
102 | return f"### Recent Conversation:\n{recent_context}"
103 |
104 | def clear(self) -> None:
105 | """Reset both recent memory and memory tokens."""
106 | self.recent_memory.clear()
107 | self.memory_tokens = []
108 | print("Memory-augmented memory cleared.")
109 |
110 | def get_memory_stats(self) -> Dict[str, Any]:
111 | """
112 | Get statistics about current memory usage.
113 |
114 | Returns:
115 | Dictionary containing memory statistics
116 | """
117 | recent_stats = self.recent_memory.get_memory_stats()
118 | num_tokens = len(self.memory_tokens)
119 |
120 | return {
121 | "strategy_type": "MemoryAugmentedMemory",
122 | "memory_tokens": num_tokens,
123 | "recent_memory_stats": recent_stats,
124 | "memory_size": f"{num_tokens} memory tokens + recent window",
125 | "advantages": ["Long-term retention", "Intelligent extraction", "Personal assistant ready"],
126 | "disadvantages": ["Complex implementation", "Additional LLM calls", "Fact extraction dependent"]
127 | }
128 |
--------------------------------------------------------------------------------
/memory_strategies/retrieval_memory.py:
--------------------------------------------------------------------------------
1 | """
2 | Retrieval-based Memory Strategy
3 |
4 | This strategy implements the core concept of Retrieval-Augmented Generation (RAG).
5 | It converts conversations into vector embeddings and uses similarity search to find
6 | the most relevant historical interactions for any given query.
7 | """
8 |
9 | import numpy as np
10 | import faiss
11 | from typing import List, Dict, Any, Optional
12 | from openai import OpenAI
13 | from .base_memory import BaseMemoryStrategy
14 | from .utils import generate_embedding, get_openai_client
15 |
16 |
17 | class RetrievalMemory(BaseMemoryStrategy):
18 | """
19 | Retrieval-based memory strategy using vector embeddings and similarity search.
20 |
21 | Advantages:
22 | - Semantic understanding of queries
23 | - Efficient retrieval of relevant information
24 | - Scalable to large conversation histories
25 | - Industry standard for RAG applications
26 |
27 | Disadvantages:
28 | - Complex implementation
29 | - Requires embedding model
30 | - Dependent on embedding quality
31 | - Additional computational overhead
32 | """
33 |
34 | def __init__(self, k: int = 2, embedding_dim: int = 1536, client: Optional[OpenAI] = None):
35 | """
36 | Initialize retrieval memory system.
37 |
38 | Args:
39 | k: Number of most relevant documents to retrieve for a given query
40 | embedding_dim: Dimension of embedding vectors (1536 for text-embedding-3-small)
41 | client: Optional OpenAI client instance
42 | """
43 | self.k = k
44 | self.embedding_dim = embedding_dim
45 | self.client = client or get_openai_client()
46 |
47 | # List to store original text content of each document
48 | self.documents: List[str] = []
49 |
50 | # Initialize FAISS index for similarity search
51 | # IndexFlatL2 uses L2 (Euclidean) distance for exhaustive search
52 | self.index = faiss.IndexFlatL2(self.embedding_dim)
53 |
54 | def add_message(self, user_input: str, ai_response: str) -> None:
55 | """
56 | Add new conversation turn to memory.
57 |
58 | Each part of the turn (user input and AI response) is embedded
59 | and indexed separately for fine-grained retrieval.
60 |
61 | Args:
62 | user_input: User's message
63 | ai_response: AI's response
64 | """
65 | # Store each part of the turn as separate documents for precise matching
66 | docs_to_add = [
67 | f"User said: {user_input}",
68 | f"AI responded: {ai_response}"
69 | ]
70 |
71 | for doc in docs_to_add:
72 | # Generate numerical vector representation of the document
73 | embedding = generate_embedding(doc, self.client)
74 |
75 | # Only proceed if embedding was successfully created
76 | if embedding:
77 | # Store original text - index will correspond to vector index in FAISS
78 | self.documents.append(doc)
79 |
80 | # FAISS requires input vectors to be float32 2D numpy arrays
81 | vector = np.array([embedding], dtype='float32')
82 |
83 | # Add vector to FAISS index, making it searchable
84 | self.index.add(vector)
85 |
86 | def get_context(self, query: str) -> str:
87 | """
88 | Find k most relevant documents from memory based on semantic similarity to query.
89 |
90 | Args:
91 | query: Current user query to find relevant context for
92 |
93 | Returns:
94 | Formatted string containing most relevant retrieved information
95 | """
96 | # If index has no vectors, there's nothing to search
97 | if self.index.ntotal == 0:
98 | return "No information in memory yet."
99 |
100 | # Convert user query to embedding vector
101 | query_embedding = generate_embedding(query, self.client)
102 | if not query_embedding:
103 | return "Could not process query for retrieval."
104 |
105 | # Convert query embedding to format required by FAISS
106 | query_vector = np.array([query_embedding], dtype='float32')
107 |
108 | # Perform search - returns distances and indices of k nearest neighbors
109 | distances, indices = self.index.search(query_vector, self.k)
110 |
111 | # Use returned indices to retrieve original text documents
112 | # Check for i != -1 because FAISS may return -1 for invalid indices
113 | retrieved_docs = [
114 | self.documents[i] for i in indices[0]
115 | if i != -1 and i < len(self.documents)
116 | ]
117 |
118 | if not retrieved_docs:
119 | return "Could not find any relevant information in memory."
120 |
121 | # Format retrieved documents as string for use as context
122 | return "### Relevant Information Retrieved from Memory:\n" + "\n---\n".join(retrieved_docs)
123 |
124 | def clear(self) -> None:
125 | """Reset both document storage and FAISS index."""
126 | self.documents = []
127 | self.index = faiss.IndexFlatL2(self.embedding_dim)
128 | print("Retrieval memory cleared.")
129 |
130 | def get_memory_stats(self) -> Dict[str, Any]:
131 | """
132 | Get statistics about current memory usage.
133 |
134 | Returns:
135 | Dictionary containing memory statistics
136 | """
137 | num_documents = len(self.documents)
138 | num_vectors = self.index.ntotal
139 |
140 | return {
141 | "strategy_type": "RetrievalMemory",
142 | "k": self.k,
143 | "embedding_dim": self.embedding_dim,
144 | "num_documents": num_documents,
145 | "num_vectors": num_vectors,
146 | "memory_size": f"{num_documents} documents, {num_vectors} vectors",
147 | "advantages": ["Semantic search", "Scalable", "Relevant retrieval"],
148 | "disadvantages": ["Complex implementation", "Embedding dependent"]
149 | }
150 |
--------------------------------------------------------------------------------
/memory_strategies/summarization_memory.py:
--------------------------------------------------------------------------------
1 | """
2 | Summarization Memory Strategy
3 |
4 | This strategy manages long conversations by periodically summarizing conversation history.
5 | It maintains a buffer of recent messages and triggers summarization when the buffer
6 | reaches a threshold, using LLM to compress historical information intelligently.
7 | """
8 |
9 | from typing import List, Dict, Any, Optional
10 | from openai import OpenAI
11 | from .base_memory import BaseMemoryStrategy
12 | from .utils import generate_text, get_openai_client
13 |
14 |
15 | class SummarizationMemory(BaseMemoryStrategy):
16 | """
17 | Summarization memory strategy that compresses conversation history using LLM.
18 |
19 | Advantages:
20 | - Manages long conversations efficiently
21 | - Retains key information through intelligent compression
22 | - Scalable token usage
23 | - Maintains conversation flow
24 |
25 | Disadvantages:
26 | - May lose details during summarization
27 | - Depends on LLM summarization quality
28 | - Additional LLM calls increase cost
29 | - Information decay over time
30 | """
31 |
32 | def __init__(self, summary_threshold: int = 4, client: Optional[OpenAI] = None):
33 | """
34 | Initialize summarization memory.
35 |
36 | Args:
37 | summary_threshold: Number of messages to accumulate before triggering summary
38 | client: Optional OpenAI client instance
39 | """
40 | self.summary_threshold = summary_threshold
41 | self.client = client or get_openai_client()
42 |
43 | # Store continuously updated summary of conversation so far
44 | self.running_summary = ""
45 |
46 | # Temporary list to hold recent messages before summarization
47 | self.buffer: List[Dict[str, str]] = []
48 |
49 | def add_message(self, user_input: str, ai_response: str) -> None:
50 | """
51 | Add new user-AI interaction to buffer.
52 |
53 | If buffer size reaches threshold, triggers memory consolidation process.
54 |
55 | Args:
56 | user_input: User's message
57 | ai_response: AI's response
58 | """
59 | # Append latest user and AI messages to temporary buffer
60 | self.buffer.append({"role": "user", "content": user_input})
61 | self.buffer.append({"role": "assistant", "content": ai_response})
62 |
63 | # Check if buffer has reached its capacity
64 | if len(self.buffer) >= self.summary_threshold:
65 | # If so, call method to summarize buffer contents
66 | self._consolidate_memory()
67 |
68 | def _consolidate_memory(self) -> None:
69 | """
70 | Use LLM to summarize buffer contents and merge with existing summary.
71 |
72 | This is the core innovation of the summarization strategy.
73 | """
74 | print("\n--- [Memory Consolidation Triggered] ---")
75 |
76 | # Convert buffered message list to single formatted string
77 | buffer_text = "\n".join([
78 | f"{msg['role'].capitalize()}: {msg['content']}"
79 | for msg in self.buffer
80 | ])
81 |
82 | # Construct specific prompt for LLM to perform summarization task
83 | summarization_prompt = (
84 | f"You are a summarization expert. Your task is to create a concise summary of a conversation. "
85 | f"Combine the 'Previous Summary' with the 'New Conversation' into a single, updated summary. "
86 | f"Capture all key facts, names, decisions, and important details.\n\n"
87 | f"### Previous Summary:\n{self.running_summary}\n\n"
88 | f"### New Conversation:\n{buffer_text}\n\n"
89 | f"### Updated Summary:"
90 | )
91 |
92 | # Call LLM with specific system prompt to get new summary
93 | new_summary = generate_text(
94 | "You are an expert summarization engine.",
95 | summarization_prompt,
96 | self.client
97 | )
98 |
99 | # Replace old summary with newly generated merged summary
100 | self.running_summary = new_summary
101 |
102 | # Clear buffer since its contents are now merged into summary
103 | self.buffer = []
104 |
105 | print(f"--- [New Summary Generated] ---")
106 | print(f"Summary: {self.running_summary[:100]}...")
107 |
108 | def get_context(self, query: str) -> str:
109 | """
110 | Construct context to send to LLM by combining long-term summary
111 | with short-term buffer of recent messages.
112 |
113 | Args:
114 | query: Current user query (ignored in this strategy)
115 |
116 | Returns:
117 | Combined context from summary and recent messages
118 | """
119 | # Format current messages in buffer
120 | buffer_text = "\n".join([
121 | f"{msg['role'].capitalize()}: {msg['content']}"
122 | for msg in self.buffer
123 | ])
124 |
125 | # Return combination of historical summary and recent unsummarized messages
126 | if self.running_summary:
127 | return f"### Summary of Past Conversation:\n{self.running_summary}\n\n### Recent Messages:\n{buffer_text}"
128 | else:
129 | return f"### Recent Messages:\n{buffer_text}" if buffer_text else "No conversation history yet."
130 |
131 | def clear(self) -> None:
132 | """Reset both summary and buffer."""
133 | self.running_summary = ""
134 | self.buffer = []
135 | print("Summarization memory cleared.")
136 |
137 | def get_memory_stats(self) -> Dict[str, Any]:
138 | """
139 | Get statistics about current memory usage.
140 |
141 | Returns:
142 | Dictionary containing memory statistics
143 | """
144 | buffer_messages = len(self.buffer)
145 | has_summary = bool(self.running_summary)
146 |
147 | return {
148 | "strategy_type": "SummarizationMemory",
149 | "summary_threshold": self.summary_threshold,
150 | "buffer_messages": buffer_messages,
151 | "has_summary": has_summary,
152 | "summary_length": len(self.running_summary) if has_summary else 0,
153 | "memory_size": f"Summary + {buffer_messages} buffered messages",
154 | "advantages": ["Efficient compression", "Retains key info", "Scalable"],
155 | "disadvantages": ["May lose details", "LLM dependent", "Additional cost"]
156 | }
157 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AI Agent Memory Design & Optimization Playground
2 |
3 | [](https://python.org)
4 | [](https://streamlit.io)
5 | [](https://fastapi.tiangolo.com)
6 | [](LICENSE)
7 |
8 | > **Interactive playground for testing and comparing 9 different AI agent memory optimization strategies**
9 |
10 | 
11 |
12 | ## Overview
13 |
14 | This project implements **9 different memory optimization techniques** for AI agents, providing a comprehensive solution for managing conversation history and context in production AI systems. Each strategy is implemented as a modular, plug-and-play class with a unified interface.
15 |
16 | ### Why Memory Optimization Matters
17 |
18 | - **Token Cost Reduction**: Prevent exponential growth in LLM API costs
19 | - **Context Preservation**: Maintain relevant information across conversations
20 | - **Scalability**: Handle long conversations efficiently
21 | - **Performance**: Optimize response times and memory usage
22 |
23 | ## Memory Strategies Implemented
24 |
25 | ### Basic Strategies
26 | 1. **Sequential Memory** - Complete conversation history storage
27 | 2. **Sliding Window Memory** - Fixed-size recent conversation window
28 | 3. **Summarization Memory** - LLM-based conversation compression
29 |
30 | ### Advanced Strategies
31 | 4. **Retrieval Memory (RAG)** - Vector similarity search for semantic retrieval
32 | 5. **Memory-Augmented Memory** - Persistent memory tokens with sliding window
33 | 6. **Hierarchical Memory** - Multi-layered working + long-term memory
34 |
35 | ### Complex Strategies
36 | 7. **Graph Memory** - Knowledge graph with entity relationships
37 | 8. **Compression Memory** - Intelligent compression with importance scoring
38 | 9. **OS-like Memory** - RAM/disk simulation with paging mechanisms
39 |
40 | ## Features
41 |
42 | - **Modular Architecture** - Strategy pattern for easy swapping
43 | - **Interactive Playground** - Streamlit web interface for testing
44 | - **Performance Analytics** - Token usage and response time tracking
45 | - **Batch Comparison** - Test multiple strategies simultaneously
46 | - **Production Ready** - FastAPI endpoints for deployment
47 | - **Real-time Metrics** - Memory statistics and performance monitoring
48 |
49 | ## Installation
50 |
51 | ### Prerequisites
52 | - Python 3.10+
53 | - OpenAI API Key
54 |
55 | ### Setup
56 |
57 | 1. **Clone the repository**
58 | ```bash
59 | git clone https://github.com/AIAnytime/Agent-Memory-Playground.git
60 | cd Agent-Memory-Playground
61 | ```
62 |
63 | 2. **Install dependencies**
64 | ```bash
65 | pip install -r requirements.txt
66 | ```
67 |
68 | 3. **Configure environment**
69 | ```bash
70 | # Create .env file
71 | echo "OPENAI_API_KEY=your_openai_api_key_here" > .env
72 | ```
73 |
74 | ## Quick Start
75 |
76 | ### 1. Interactive Playground (Streamlit)
77 | ```bash
78 | streamlit run streamlit_playground.py
79 | ```
80 | - Open http://localhost:8501 in your browser
81 | - Enter your OpenAI API key in the sidebar
82 | - Select a memory strategy and start testing!
83 |
84 | ### 2. API Server (FastAPI)
85 | ```bash
86 | uvicorn api:app --reload
87 | ```
88 | - API documentation: http://localhost:8000/docs
89 | - Create sessions, chat, and monitor performance via REST API
90 |
91 | ### 3. Command Line Example
92 | ```bash
93 | python example_usage.py
94 | ```
95 | - Interactive CLI for testing all memory strategies
96 | - Detailed memory statistics and performance metrics
97 |
98 | ## Usage Examples
99 |
100 | ### Basic Usage
101 | ```python
102 | from memory_strategies import SequentialMemory, AIAgent
103 |
104 | # Initialize memory strategy
105 | memory = SequentialMemory()
106 | agent = AIAgent(memory_strategy=memory)
107 |
108 | # Chat with the agent
109 | response = agent.chat("Hello! My name is Alex.")
110 | print(response["ai_response"])
111 |
112 | # Memory automatically preserved for next interaction
113 | response = agent.chat("What's my name?")
114 | print(response["ai_response"]) # Will remember "Alex"
115 | ```
116 |
117 | ### Advanced RAG Implementation
118 | ```python
119 | from memory_strategies import RetrievalMemory, AIAgent
120 |
121 | # Initialize RAG-based memory
122 | memory = RetrievalMemory(k=3) # Retrieve top 3 similar conversations
123 | agent = AIAgent(memory_strategy=memory)
124 |
125 | # Build conversation history
126 | agent.chat("I'm a software engineer working on ML projects")
127 | agent.chat("I prefer Python and love coffee")
128 | agent.chat("I'm building a recommendation system")
129 |
130 | # Query with semantic similarity
131 | response = agent.chat("What do you know about my work?")
132 | # Will retrieve relevant context about ML, Python, and recommendation systems
133 | ```
134 |
135 | ### Production API Usage
136 | ```bash
137 | # Create a session with hierarchical memory
138 | curl -X POST "http://localhost:8000/sessions" \
139 | -H "Content-Type: application/json" \
140 | -d '{
141 | "strategy_type": "hierarchical",
142 | "system_prompt": "You are a helpful AI assistant.",
143 | "api_key": "your_openai_key"
144 | }'
145 |
146 | # Chat with the session
147 | curl -X POST "http://localhost:8000/sessions/{session_id}/chat" \
148 | -H "Content-Type: application/json" \
149 | -d '{
150 | "message": "Remember that I prefer concise responses",
151 | "api_key": "your_openai_key"
152 | }'
153 | ```
154 |
155 | ## Performance Comparison
156 |
157 | | Strategy | Token Efficiency | Retrieval Speed | Memory Usage | Best For |
158 | |----------|------------------|-----------------|--------------|----------|
159 | | Sequential | ❌ Low | ⚡ Instant | 📈 High | Short conversations |
160 | | Sliding Window | ✅ High | ⚡ Instant | 📊 Constant | Real-time chat |
161 | | Retrieval (RAG) | ✅ High | 🔍 Fast | 📊 Medium | Production systems |
162 | | Hierarchical | ✅ Very High | 🔍 Fast | 📊 Medium | Complex applications |
163 | | Graph Memory | 🔍 Medium | 🐌 Slow | 📈 High | Knowledge systems |
164 |
165 | ## Architecture
166 |
167 | ### Strategy Pattern Design
168 | ```
169 | AIAgent
170 | ├── BaseMemoryStrategy (Abstract)
171 | │ ├── add_message()
172 | │ ├── get_context()
173 | │ └── clear()
174 | ├── SequentialMemory
175 | ├── SlidingWindowMemory
176 | ├── RetrievalMemory
177 | └── ... (6 more strategies)
178 | ```
179 |
180 | ### Key Components
181 | - **Memory Strategies**: Modular memory implementations
182 | - **AI Agent**: Core agent using strategy pattern
183 | - **Utilities**: Token counting, embeddings, LLM integration
184 | - **API Layer**: FastAPI endpoints for production use
185 | - **Playground**: Streamlit interface for testing
186 |
187 | ## Monitoring & Metrics
188 |
189 | Track essential performance metrics:
190 |
191 | ```python
192 | {
193 | "total_content_tokens": 1250, # Raw conversation data
194 | "total_prompt_tokens": 4800, # Actual LLM costs
195 | "average_retrieval_time": 0.15, # Memory access speed
196 | "memory_efficiency": 0.73, # Compression ratio
197 | "context_relevance_score": 0.89 # Quality of retrieved context
198 | }
199 | ```
200 |
201 | ## Configuration
202 |
203 | ### Memory Strategy Parameters
204 |
205 | **Sliding Window Memory**
206 | ```python
207 | SlidingWindowMemory(window_size=4) # Keep last 4 conversation turns
208 | ```
209 |
210 | **Retrieval Memory (RAG)**
211 | ```python
212 | RetrievalMemory(k=3) # Retrieve top 3 similar conversations
213 | ```
214 |
215 | **Hierarchical Memory**
216 | ```python
217 | HierarchicalMemory(
218 | window_size=2, # Working memory size
219 | k=3 # Long-term retrieval count
220 | )
221 | ```
222 |
223 | ## Production Deployment
224 |
225 | ### Docker Deployment
226 | ```dockerfile
227 | FROM python:3.9-slim
228 |
229 | WORKDIR /app
230 | COPY requirements.txt .
231 | RUN pip install -r requirements.txt
232 |
233 | COPY . .
234 | EXPOSE 8000
235 |
236 | CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000"]
237 | ```
238 |
239 | ### Environment Variables
240 | ```bash
241 | OPENAI_API_KEY=your_openai_api_key
242 | OPENAI_MODEL=gpt-4o-mini
243 | EMBEDDING_MODEL=text-embedding-3-small
244 | ```
245 |
246 | ## Testing
247 |
248 | Run the test suite:
249 | ```bash
250 | python -m pytest tests/
251 | ```
252 |
253 | Run performance benchmarks:
254 | ```bash
255 | python benchmark.py
256 | ```
257 |
258 | ## Documentation
259 |
260 | - **[Technical Guide](AI_Agent_Memory_Documentation.md)** - Comprehensive implementation details
261 | - **[API Documentation](http://localhost:8000/docs)** - FastAPI interactive docs
262 | - **[Strategy Comparison](docs/strategy-comparison.md)** - Performance analysis
263 | - **[Production Guide](docs/production-guide.md)** - Deployment best practices
264 |
265 | ## Contributing
266 |
267 | We welcome contributions! Please see our [Contributing Guidelines](CONTRIBUTING.md) for details.
268 |
269 | 1. Fork the repository
270 | 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
271 | 3. Commit your changes (`git commit -m 'Add amazing feature'`)
272 | 4. Push to the branch (`git push origin feature/amazing-feature`)
273 | 5. Open a Pull Request
274 |
275 | ## License
276 |
277 | This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details.
278 |
279 | ## Acknowledgments
280 |
281 | - **OpenAI** for providing the GPT models and embeddings
282 | - **Streamlit** for the amazing web framework
283 | - **FastAPI** for the high-performance API framework
284 | - **FAISS** for efficient vector similarity search
285 |
286 | ## Support & Contact
287 |
288 | - **Website**: [aianytime.net](https://aianytime.net)
289 | - **Creator Portfolio**: [sonukumar.site](https://sonukumar.site)
290 | - **YouTube**: [@AIAnytime](https://www.youtube.com/@AIAnytime)
291 | - **Issues**: [GitHub Issues](https://github.com/AIAnytime/Agent-Memory-Playground/issues)
292 |
293 | ---
294 |
295 |
296 |
Built with ❤️ by AI Anytime
297 |
Star this repo if you find it helpful!
298 |
299 |
--------------------------------------------------------------------------------
/memory_strategies/graph_memory.py:
--------------------------------------------------------------------------------
1 | """
2 | Graph Memory Network Strategy
3 |
4 | This strategy treats conversation elements as nodes and their relationships as edges,
5 | enabling complex reasoning and relationship understanding. Particularly suited for
6 | expert systems and knowledge base applications.
7 | """
8 |
9 | import networkx as nx
10 | from typing import List, Dict, Any, Optional, Set
11 | from openai import OpenAI
12 | from .base_memory import BaseMemoryStrategy
13 | from .utils import generate_text, get_openai_client
14 |
15 |
16 | class GraphMemory(BaseMemoryStrategy):
17 | """
18 | Graph-based memory strategy using NetworkX for relationship modeling.
19 |
20 | Advantages:
21 | - Models complex relationships between information
22 | - Supports logical reasoning queries
23 | - Structured knowledge representation
24 | - Excellent for expert systems
25 |
26 | Disadvantages:
27 | - Complex implementation and maintenance
28 | - Requires relationship extraction
29 | - May be overkill for simple conversations
30 | - Computational overhead for large graphs
31 | """
32 |
33 | def __init__(self, client: Optional[OpenAI] = None):
34 | """
35 | Initialize graph memory system.
36 |
37 | Args:
38 | client: Optional OpenAI client instance
39 | """
40 | self.client = client or get_openai_client()
41 |
42 | # Initialize directed graph to store conversation elements and relationships
43 | self.knowledge_graph = nx.DiGraph()
44 |
45 | # Counter for generating unique node IDs
46 | self.node_counter = 0
47 |
48 | # Store raw conversation history for fallback
49 | self.conversation_history: List[Dict[str, str]] = []
50 |
51 | def add_message(self, user_input: str, ai_response: str) -> None:
52 | """
53 | Add conversation turn to graph by extracting entities and relationships.
54 |
55 | Args:
56 | user_input: User's message
57 | ai_response: AI's response
58 | """
59 | # Store raw conversation for fallback
60 | self.conversation_history.append({
61 | "user": user_input,
62 | "assistant": ai_response,
63 | "turn_id": self.node_counter
64 | })
65 |
66 | # Extract entities and relationships from the conversation turn
67 | self._extract_and_add_entities(user_input, "user", self.node_counter)
68 | self._extract_and_add_entities(ai_response, "assistant", self.node_counter)
69 |
70 | self.node_counter += 1
71 |
72 | def _extract_and_add_entities(self, text: str, speaker: str, turn_id: int) -> None:
73 | """
74 | Extract entities and relationships from text and add to knowledge graph.
75 |
76 | Args:
77 | text: Text to extract entities from
78 | speaker: Who said the text (user/assistant)
79 | turn_id: Turn identifier
80 | """
81 | # Use LLM to extract key entities and relationships
82 | extraction_prompt = (
83 | f"Extract key entities (people, places, concepts, facts) and relationships from this text. "
84 | f"Format as: ENTITIES: entity1, entity2, entity3... RELATIONSHIPS: entity1->relationship->entity2, etc.\n\n"
85 | f"Text: {text}\n\n"
86 | f"If no clear entities or relationships, respond with 'ENTITIES: none RELATIONSHIPS: none'"
87 | )
88 |
89 | extracted_info = generate_text(
90 | "You are an entity and relationship extraction expert.",
91 | extraction_prompt,
92 | self.client
93 | )
94 |
95 | # Parse extracted information and add to graph
96 | self._parse_and_add_to_graph(extracted_info, speaker, turn_id, text)
97 |
98 | def _parse_and_add_to_graph(self, extracted_info: str, speaker: str, turn_id: int, original_text: str) -> None:
99 | """
100 | Parse extracted entities and relationships and add them to the knowledge graph.
101 |
102 | Args:
103 | extracted_info: LLM-extracted entities and relationships
104 | speaker: Who said the text
105 | turn_id: Turn identifier
106 | original_text: Original text for context
107 | """
108 | try:
109 | # Simple parsing of the extraction format
110 | if "ENTITIES:" in extracted_info and "RELATIONSHIPS:" in extracted_info:
111 | parts = extracted_info.split("RELATIONSHIPS:")
112 | entities_part = parts[0].replace("ENTITIES:", "").strip()
113 | relationships_part = parts[1].strip() if len(parts) > 1 else ""
114 |
115 | # Add entities as nodes
116 | if entities_part.lower() != "none":
117 | entities = [e.strip() for e in entities_part.split(",") if e.strip()]
118 | for entity in entities:
119 | if entity:
120 | # Add entity node with metadata
121 | self.knowledge_graph.add_node(
122 | entity,
123 | type="entity",
124 | speaker=speaker,
125 | turn_id=turn_id,
126 | context=original_text[:100] # First 100 chars for context
127 | )
128 |
129 | # Add relationships as edges
130 | if relationships_part.lower() != "none":
131 | relationships = [r.strip() for r in relationships_part.split(",") if r.strip()]
132 | for rel in relationships:
133 | if "->" in rel:
134 | parts = rel.split("->")
135 | if len(parts) == 3:
136 | source, relation, target = [p.strip() for p in parts]
137 | if source and target and relation:
138 | # Add relationship edge
139 | self.knowledge_graph.add_edge(
140 | source, target,
141 | relationship=relation,
142 | turn_id=turn_id,
143 | speaker=speaker
144 | )
145 | except Exception as e:
146 | print(f"Error parsing extracted info: {e}")
147 |
148 | def get_context(self, query: str) -> str:
149 | """
150 | Retrieve relevant context by traversing the knowledge graph.
151 |
152 | Args:
153 | query: Current user query
154 |
155 | Returns:
156 | Relevant context from knowledge graph and conversation history
157 | """
158 | if self.knowledge_graph.number_of_nodes() == 0:
159 | return "No information in memory yet."
160 |
161 | # Extract entities from the query
162 | query_extraction_prompt = (
163 | f"Extract key entities (people, places, concepts) from this query. "
164 | f"List them separated by commas. If no clear entities, respond with 'none'.\n\n"
165 | f"Query: {query}"
166 | )
167 |
168 | query_entities = generate_text(
169 | "You are an entity extraction expert.",
170 | query_extraction_prompt,
171 | self.client
172 | )
173 |
174 | relevant_info = []
175 |
176 | # Find relevant nodes and relationships
177 | if query_entities.lower() != "none":
178 | entities = [e.strip() for e in query_entities.split(",") if e.strip()]
179 |
180 | for entity in entities:
181 | # Find exact matches or similar entities in graph
182 | for node in self.knowledge_graph.nodes():
183 | if entity.lower() in node.lower() or node.lower() in entity.lower():
184 | # Get node information
185 | node_data = self.knowledge_graph.nodes[node]
186 | relevant_info.append(f"Entity: {node} (from {node_data.get('speaker', 'unknown')})")
187 |
188 | # Get relationships involving this node
189 | for neighbor in self.knowledge_graph.neighbors(node):
190 | edge_data = self.knowledge_graph.edges[node, neighbor]
191 | relationship = edge_data.get('relationship', 'related to')
192 | relevant_info.append(f" → {relationship} → {neighbor}")
193 |
194 | # Fallback to recent conversation if no graph matches
195 | if not relevant_info:
196 | recent_turns = self.conversation_history[-3:] # Last 3 turns
197 | for turn in recent_turns:
198 | relevant_info.append(f"Turn {turn['turn_id']}: User: {turn['user']}")
199 | relevant_info.append(f"Turn {turn['turn_id']}: Assistant: {turn['assistant']}")
200 |
201 | return "### Knowledge Graph Context:\n" + "\n".join(relevant_info) if relevant_info else "No relevant information found."
202 |
203 | def clear(self) -> None:
204 | """Reset the knowledge graph and conversation history."""
205 | self.knowledge_graph.clear()
206 | self.conversation_history = []
207 | self.node_counter = 0
208 | print("Graph memory cleared.")
209 |
210 | def get_memory_stats(self) -> Dict[str, Any]:
211 | """
212 | Get statistics about the knowledge graph.
213 |
214 | Returns:
215 | Dictionary containing memory statistics
216 | """
217 | num_nodes = self.knowledge_graph.number_of_nodes()
218 | num_edges = self.knowledge_graph.number_of_edges()
219 | num_turns = len(self.conversation_history)
220 |
221 | return {
222 | "strategy_type": "GraphMemory",
223 | "num_nodes": num_nodes,
224 | "num_edges": num_edges,
225 | "num_turns": num_turns,
226 | "memory_size": f"{num_nodes} nodes, {num_edges} edges, {num_turns} turns",
227 | "advantages": ["Relationship modeling", "Complex reasoning", "Structured knowledge"],
228 | "disadvantages": ["Complex implementation", "Extraction dependent", "Computational overhead"]
229 | }
230 |
--------------------------------------------------------------------------------
/example_usage.py:
--------------------------------------------------------------------------------
1 | """
2 | AI Agent Memory Design & Optimization - Example Usage
3 |
4 | This file demonstrates how to use multiple memory optimization techniques
5 | in a plug-and-play manner. Each strategy can be easily swapped and tested.
6 | """
7 |
8 | import os
9 | import time
10 | from memory_strategies import (
11 | AIAgent,
12 | SequentialMemory,
13 | SlidingWindowMemory,
14 | SummarizationMemory,
15 | RetrievalMemory,
16 | MemoryAugmentedMemory,
17 | HierarchicalMemory,
18 | GraphMemory,
19 | CompressionMemory,
20 | OSMemory,
21 | STRATEGY_INFO
22 | )
23 |
24 |
25 | def demo_strategy(strategy_class, strategy_name, test_conversations, **kwargs):
26 | """
27 | Demonstrate a specific memory strategy with test conversations.
28 |
29 | Args:
30 | strategy_class: Memory strategy class to test
31 | strategy_name: Name of the strategy for display
32 | test_conversations: List of user inputs to test
33 | **kwargs: Additional arguments for strategy initialization
34 | """
35 | print(f"\n{'='*60}")
36 | print(f"TESTING: {strategy_name}")
37 | print(f"{'='*60}")
38 |
39 | # Display strategy information
40 | info = STRATEGY_INFO.get(strategy_class.__name__, {})
41 | print(f"Complexity: {info.get('complexity', 'Unknown')}")
42 | print(f"Description: {info.get('description', 'No description')}")
43 | print(f"Best for: {info.get('best_for', 'General use')}")
44 | print()
45 |
46 | try:
47 | # Initialize strategy and agent
48 | memory_strategy = strategy_class(**kwargs)
49 | agent = AIAgent(memory_strategy, system_prompt="You are a helpful AI assistant with memory.")
50 |
51 | # Run test conversations
52 | for i, user_input in enumerate(test_conversations, 1):
53 | print(f"\n--- Conversation Turn {i} ---")
54 | result = agent.chat(user_input, verbose=True)
55 |
56 | # Add small delay for readability
57 | time.sleep(0.5)
58 |
59 | # Display memory statistics
60 | print(f"\nMemory Statistics:")
61 | stats = agent.get_memory_stats()
62 | for key, value in stats.items():
63 | if key not in ['advantages', 'disadvantages']:
64 | print(f" {key}: {value}")
65 |
66 | print(f"\nAdvantages: {', '.join(stats.get('advantages', []))}")
67 | print(f"Disadvantages: {', '.join(stats.get('disadvantages', []))}")
68 |
69 | except Exception as e:
70 | print(f"Error testing {strategy_name}: {str(e)}")
71 |
72 | print(f"\n{'='*60}")
73 |
74 |
75 | def run_comprehensive_demo():
76 | """
77 | Run comprehensive demonstration of all memory strategies.
78 | """
79 | print("AI Agent Memory Design & Optimization - Comprehensive Demo")
80 | print("=" * 60)
81 |
82 | # Test conversations that showcase different memory capabilities
83 | test_conversations = [
84 | "Hi! My name is Alex and I'm a software engineer.",
85 | "I'm working on a machine learning project about natural language processing.",
86 | "My favorite programming language is Python, and I prefer coffee over tea.",
87 | "Can you remember what my name is and what I'm working on?",
88 | "What do you know about my preferences?"
89 | ]
90 |
91 | # Test each strategy
92 | strategies_to_test = [
93 | (SequentialMemory, "Sequential Memory", {}),
94 | (SlidingWindowMemory, "Sliding Window Memory", {"window_size": 3}),
95 | (SummarizationMemory, "Summarization Memory", {"summary_threshold": 4}),
96 | (RetrievalMemory, "Retrieval Memory", {"k": 2}),
97 | (MemoryAugmentedMemory, "Memory-Augmented Memory", {"window_size": 2}),
98 | (HierarchicalMemory, "Hierarchical Memory", {"window_size": 2, "k": 2}),
99 | (GraphMemory, "Graph Memory", {}),
100 | (CompressionMemory, "Compression Memory", {"compression_ratio": 0.6}),
101 | (OSMemory, "OS-like Memory", {"ram_size": 2})
102 | ]
103 |
104 | for strategy_class, strategy_name, kwargs in strategies_to_test:
105 | demo_strategy(strategy_class, strategy_name, test_conversations, **kwargs)
106 |
107 | # Ask user if they want to continue
108 | user_input = input("\n🤔 Continue to next strategy? (y/n/q to quit): ").lower()
109 | if user_input == 'q':
110 | break
111 | elif user_input == 'n':
112 | continue
113 |
114 |
115 | def interactive_strategy_tester():
116 | """
117 | Interactive mode for testing specific strategies.
118 | """
119 | print("\nInteractive Strategy Tester")
120 | print("=" * 40)
121 |
122 | # Display available strategies
123 | strategies = {
124 | "1": (SequentialMemory, "Sequential Memory", {}),
125 | "2": (SlidingWindowMemory, "Sliding Window Memory", {"window_size": 3}),
126 | "3": (SummarizationMemory, "Summarization Memory", {"summary_threshold": 4}),
127 | "4": (RetrievalMemory, "Retrieval Memory", {"k": 2}),
128 | "5": (MemoryAugmentedMemory, "Memory-Augmented Memory", {"window_size": 2}),
129 | "6": (HierarchicalMemory, "Hierarchical Memory", {"window_size": 2, "k": 2}),
130 | "7": (GraphMemory, "Graph Memory", {}),
131 | "8": (CompressionMemory, "Compression Memory", {"compression_ratio": 0.6}),
132 | "9": (OSMemory, "OS-like Memory", {"ram_size": 2})
133 | }
134 |
135 | print("Available Memory Strategies:")
136 | for key, (_, name, _) in strategies.items():
137 | print(f" {key}. {name}")
138 |
139 | while True:
140 | choice = input("\nSelect a strategy (1-9) or 'q' to quit: ").strip()
141 |
142 | if choice.lower() == 'q':
143 | break
144 |
145 | if choice in strategies:
146 | strategy_class, strategy_name, kwargs = strategies[choice]
147 |
148 | try:
149 | # Initialize strategy and agent
150 | memory_strategy = strategy_class(**kwargs)
151 | agent = AIAgent(memory_strategy, system_prompt="You are a helpful AI assistant.")
152 |
153 | print(f"\nNow using: {strategy_name}")
154 | print("Type 'stats' to see memory statistics, 'clear' to clear memory, 'back' to choose another strategy")
155 |
156 | while True:
157 | user_input = input("\nYou: ").strip()
158 |
159 | if user_input.lower() == 'back':
160 | break
161 | elif user_input.lower() == 'stats':
162 | stats = agent.get_memory_stats()
163 | print("\nMemory Statistics:")
164 | for key, value in stats.items():
165 | print(f" {key}: {value}")
166 | elif user_input.lower() == 'clear':
167 | agent.clear_memory()
168 | elif user_input:
169 | result = agent.chat(user_input, verbose=False)
170 | print(f"AI: {result['ai_response']}")
171 | print(f"Response time: {result['generation_time']:.2f}s | Tokens: {result['prompt_tokens']}")
172 |
173 | except Exception as e:
174 | print(f"Error with {strategy_name}: {str(e)}")
175 | else:
176 | print("Invalid choice. Please select 1-9 or 'q'.")
177 |
178 |
179 | def quick_comparison_demo():
180 | """
181 | Quick comparison of key strategies on the same conversation.
182 | """
183 | print("\n⚡ Quick Comparison Demo")
184 | print("=" * 40)
185 |
186 | # Single conversation to test all strategies
187 | test_conversation = [
188 | "Remember this important fact: I am allergic to peanuts.",
189 | "I love traveling and have been to Japan, France, and Italy.",
190 | "My favorite hobby is photography, especially landscape photography.",
191 | "What do you know about my allergy and travel experiences?"
192 | ]
193 |
194 | # Key strategies to compare
195 | comparison_strategies = [
196 | (SequentialMemory, "Sequential", {}),
197 | (SlidingWindowMemory, "Sliding Window", {"window_size": 2}),
198 | (RetrievalMemory, "Retrieval (RAG)", {"k": 2}),
199 | (HierarchicalMemory, "Hierarchical", {"window_size": 2, "k": 2})
200 | ]
201 |
202 | results = {}
203 |
204 | for strategy_class, strategy_name, kwargs in comparison_strategies:
205 | print(f"\nTesting {strategy_name}...")
206 |
207 | try:
208 | memory_strategy = strategy_class(**kwargs)
209 | agent = AIAgent(memory_strategy, system_prompt="You are a helpful assistant.")
210 |
211 | # Run all conversations
212 | for user_input in test_conversation:
213 | result = agent.chat(user_input, verbose=False)
214 |
215 | # Store final result for comparison
216 | results[strategy_name] = {
217 | "final_response": result['ai_response'],
218 | "final_tokens": result['prompt_tokens'],
219 | "memory_stats": agent.get_memory_stats()
220 | }
221 |
222 | except Exception as e:
223 | results[strategy_name] = {"error": str(e)}
224 |
225 | # Display comparison
226 | print(f"\nCOMPARISON RESULTS")
227 | print("=" * 50)
228 |
229 | for strategy_name, data in results.items():
230 | print(f"\n{strategy_name}:")
231 | if "error" in data:
232 | print(f" Error: {data['error']}")
233 | else:
234 | print(f" Response: {data['final_response'][:100]}...")
235 | print(f" Tokens: {data['final_tokens']}")
236 | print(f" Memory: {data['memory_stats'].get('memory_size', 'Unknown')}")
237 |
238 |
239 | def main():
240 | """
241 | Main function with menu-driven interface.
242 | """
243 | print("AI Agent Memory Design & Optimization - Demo Suite")
244 | print("=" * 50)
245 |
246 | # Check if OpenAI API key is available
247 | if not os.getenv("OPENAI_API_KEY"):
248 | print("Error: OPENAI_API_KEY not found in environment variables.")
249 | print("Please set your OpenAI API key in the .env file.")
250 | return
251 |
252 | while True:
253 | print("\nChoose a demo mode:")
254 | print("1. Comprehensive Demo (all strategies)")
255 | print("2. Interactive Tester (choose strategy)")
256 | print("3. Quick Comparison (key strategies)")
257 | print("4. Exit")
258 |
259 | choice = input("\nEnter your choice (1-4): ").strip()
260 |
261 | if choice == "1":
262 | run_comprehensive_demo()
263 | elif choice == "2":
264 | interactive_strategy_tester()
265 | elif choice == "3":
266 | quick_comparison_demo()
267 | elif choice == "4":
268 | print("👋 Goodbye!")
269 | break
270 | else:
271 | print("❌ Invalid choice. Please select 1-4.")
272 |
273 |
274 | if __name__ == "__main__":
275 | main()
276 |
--------------------------------------------------------------------------------
/memory_strategies/compression_memory.py:
--------------------------------------------------------------------------------
1 | """
2 | Memory Compression and Integration Strategy
3 |
4 | This strategy compresses and integrates historical conversations through intelligent
5 | algorithms, significantly reducing storage space and processing overhead while
6 | retaining key information through multi-level compression mechanisms.
7 | """
8 |
9 | import json
10 | from typing import List, Dict, Any, Optional
11 | from openai import OpenAI
12 | from .base_memory import BaseMemoryStrategy
13 | from .utils import generate_text, get_openai_client, count_tokens
14 |
15 |
16 | class CompressionMemory(BaseMemoryStrategy):
17 | """
18 | Memory compression strategy with intelligent information integration.
19 |
20 | Advantages:
21 | - Significant storage space reduction
22 | - Intelligent information merging
23 | - Dynamic importance scoring
24 | - Automatic redundancy filtering
25 |
26 | Disadvantages:
27 | - Complex compression algorithms
28 | - Potential information loss
29 | - Computational overhead for compression
30 | - Tuning required for optimal performance
31 | """
32 |
33 | def __init__(
34 | self,
35 | compression_ratio: float = 0.5,
36 | importance_threshold: float = 0.7,
37 | client: Optional[OpenAI] = None
38 | ):
39 | """
40 | Initialize compression memory system.
41 |
42 | Args:
43 | compression_ratio: Target compression ratio (0.5 = 50% compression)
44 | importance_threshold: Threshold for importance scoring (0-1)
45 | client: Optional OpenAI client instance
46 | """
47 | self.compression_ratio = compression_ratio
48 | self.importance_threshold = importance_threshold
49 | self.client = client or get_openai_client()
50 |
51 | # Store conversation segments with metadata
52 | self.memory_segments: List[Dict[str, Any]] = []
53 |
54 | # Compressed memory storage
55 | self.compressed_memory: List[Dict[str, Any]] = []
56 |
57 | # Track compression statistics
58 | self.compression_stats = {
59 | "original_tokens": 0,
60 | "compressed_tokens": 0,
61 | "compression_count": 0
62 | }
63 |
64 | def add_message(self, user_input: str, ai_response: str) -> None:
65 | """
66 | Add new conversation turn with importance scoring and compression triggers.
67 |
68 | Args:
69 | user_input: User's message
70 | ai_response: AI's response
71 | """
72 | # Calculate importance score for this conversation turn
73 | importance_score = self._calculate_importance_score(user_input, ai_response)
74 |
75 | # Create memory segment with metadata
76 | segment = {
77 | "user_input": user_input,
78 | "ai_response": ai_response,
79 | "importance_score": importance_score,
80 | "timestamp": len(self.memory_segments),
81 | "token_count": count_tokens(user_input + ai_response),
82 | "compressed": False
83 | }
84 |
85 | self.memory_segments.append(segment)
86 | self.compression_stats["original_tokens"] += segment["token_count"]
87 |
88 | # Trigger compression if we have enough segments
89 | if len(self.memory_segments) >= 6: # Compress every 6 segments
90 | self._compress_memory_segments()
91 |
92 | def _calculate_importance_score(self, user_input: str, ai_response: str) -> float:
93 | """
94 | Calculate importance score for a conversation turn using LLM.
95 |
96 | Args:
97 | user_input: User's message
98 | ai_response: AI's response
99 |
100 | Returns:
101 | Importance score between 0 and 1
102 | """
103 | scoring_prompt = (
104 | f"Rate the importance of this conversation turn on a scale of 0.0 to 1.0. "
105 | f"Consider factors like: factual information, user preferences, decisions, "
106 | f"emotional significance, and future relevance. "
107 | f"Respond with only a number between 0.0 and 1.0.\n\n"
108 | f"User: {user_input}\n"
109 | f"AI: {ai_response}"
110 | )
111 |
112 | try:
113 | score_text = generate_text(
114 | "You are an importance scoring expert.",
115 | scoring_prompt,
116 | self.client
117 | )
118 | # Extract numeric score from response
119 | score = float(score_text.strip())
120 | return max(0.0, min(1.0, score)) # Clamp between 0 and 1
121 | except:
122 | return 0.5 # Default moderate importance
123 |
124 | def _compress_memory_segments(self) -> None:
125 | """
126 | Compress memory segments using intelligent algorithms.
127 | """
128 | print("--- [Memory Compression: Compressing memory segments] ---")
129 |
130 | # Separate high and low importance segments
131 | high_importance = [s for s in self.memory_segments if s["importance_score"] >= self.importance_threshold]
132 | low_importance = [s for s in self.memory_segments if s["importance_score"] < self.importance_threshold]
133 |
134 | # Compress low importance segments
135 | if low_importance:
136 | compressed_segment = self._semantic_compression(low_importance)
137 | self.compressed_memory.append(compressed_segment)
138 |
139 | # Keep high importance segments with minimal compression
140 | for segment in high_importance:
141 | segment["compressed"] = True
142 | self.compressed_memory.append({
143 | "type": "high_importance",
144 | "content": f"User: {segment['user_input']}\nAI: {segment['ai_response']}",
145 | "importance_score": segment["importance_score"],
146 | "timestamp": segment["timestamp"]
147 | })
148 |
149 | # Clear processed segments
150 | self.memory_segments = []
151 | self.compression_stats["compression_count"] += 1
152 |
153 | def _semantic_compression(self, segments: List[Dict[str, Any]]) -> Dict[str, Any]:
154 | """
155 | Perform semantic-level compression on low importance segments.
156 |
157 | Args:
158 | segments: List of memory segments to compress
159 |
160 | Returns:
161 | Compressed segment dictionary
162 | """
163 | # Combine all low importance conversations
164 | combined_text = "\n".join([
165 | f"User: {s['user_input']}\nAI: {s['ai_response']}"
166 | for s in segments
167 | ])
168 |
169 | # Use LLM to create compressed summary
170 | compression_prompt = (
171 | f"Compress the following conversations into a concise summary that retains "
172 | f"the key information while reducing length by approximately {int(self.compression_ratio * 100)}%. "
173 | f"Focus on facts, decisions, and context that might be relevant later.\n\n"
174 | f"Conversations:\n{combined_text}\n\n"
175 | f"Compressed Summary:"
176 | )
177 |
178 | compressed_content = generate_text(
179 | "You are a memory compression expert.",
180 | compression_prompt,
181 | self.client
182 | )
183 |
184 | compressed_tokens = count_tokens(compressed_content)
185 | original_tokens = sum(s["token_count"] for s in segments)
186 |
187 | self.compression_stats["compressed_tokens"] += compressed_tokens
188 |
189 | return {
190 | "type": "compressed",
191 | "content": compressed_content,
192 | "original_segments": len(segments),
193 | "compression_ratio": compressed_tokens / original_tokens if original_tokens > 0 else 0,
194 | "timestamp_range": (segments[0]["timestamp"], segments[-1]["timestamp"])
195 | }
196 |
197 | def get_context(self, query: str) -> str:
198 | """
199 | Retrieve relevant context from both active segments and compressed memory.
200 |
201 | Args:
202 | query: Current user query
203 |
204 | Returns:
205 | Relevant context from compressed and active memory
206 | """
207 | context_parts = []
208 |
209 | # Add relevant compressed memory
210 | for compressed_segment in self.compressed_memory:
211 | if self._is_relevant_to_query(compressed_segment["content"], query):
212 | context_parts.append(f"[Compressed Memory]: {compressed_segment['content']}")
213 |
214 | # Add recent active segments
215 | for segment in self.memory_segments[-3:]: # Last 3 active segments
216 | context_parts.append(f"User: {segment['user_input']}\nAI: {segment['ai_response']}")
217 |
218 | if not context_parts:
219 | return "No relevant information in memory yet."
220 |
221 | return "### Memory Context:\n" + "\n---\n".join(context_parts)
222 |
223 | def _is_relevant_to_query(self, content: str, query: str) -> bool:
224 | """
225 | Simple relevance check based on keyword overlap.
226 |
227 | Args:
228 | content: Memory content to check
229 | query: User query
230 |
231 | Returns:
232 | True if content is relevant to query
233 | """
234 | query_words = set(query.lower().split())
235 | content_words = set(content.lower().split())
236 |
237 | # Check for word overlap (simple heuristic)
238 | overlap = len(query_words.intersection(content_words))
239 | return overlap >= 2 # At least 2 words in common
240 |
241 | def clear(self) -> None:
242 | """Reset all memory storage and statistics."""
243 | self.memory_segments = []
244 | self.compressed_memory = []
245 | self.compression_stats = {
246 | "original_tokens": 0,
247 | "compressed_tokens": 0,
248 | "compression_count": 0
249 | }
250 | print("Compression memory cleared.")
251 |
252 | def get_memory_stats(self) -> Dict[str, Any]:
253 | """
254 | Get comprehensive statistics about memory compression.
255 |
256 | Returns:
257 | Dictionary containing memory statistics
258 | """
259 | active_segments = len(self.memory_segments)
260 | compressed_segments = len(self.compressed_memory)
261 |
262 | overall_compression_ratio = (
263 | self.compression_stats["compressed_tokens"] / self.compression_stats["original_tokens"]
264 | if self.compression_stats["original_tokens"] > 0 else 0
265 | )
266 |
267 | return {
268 | "strategy_type": "CompressionMemory",
269 | "compression_ratio_target": self.compression_ratio,
270 | "importance_threshold": self.importance_threshold,
271 | "active_segments": active_segments,
272 | "compressed_segments": compressed_segments,
273 | "compression_stats": self.compression_stats,
274 | "overall_compression_ratio": overall_compression_ratio,
275 | "memory_size": f"{active_segments} active + {compressed_segments} compressed",
276 | "advantages": ["Space reduction", "Intelligent merging", "Redundancy filtering"],
277 | "disadvantages": ["Complex algorithms", "Information loss", "Computational overhead"]
278 | }
279 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/api.py:
--------------------------------------------------------------------------------
1 | """
2 | FastAPI Application for AI Agent Memory Strategies
3 |
4 | This API provides endpoints to interact with all 9 memory optimization techniques
5 | through RESTful endpoints. Each strategy can be used independently via the API.
6 | """
7 |
8 | import os
9 | import uuid
10 | from typing import Dict, List, Optional, Any
11 | from fastapi import FastAPI, HTTPException, Depends
12 | from fastapi.middleware.cors import CORSMiddleware
13 | from pydantic import BaseModel
14 | from contextlib import asynccontextmanager
15 |
16 | from memory_strategies import (
17 | AIAgent,
18 | SequentialMemory,
19 | SlidingWindowMemory,
20 | SummarizationMemory,
21 | RetrievalMemory,
22 | MemoryAugmentedMemory,
23 | HierarchicalMemory,
24 | GraphMemory,
25 | CompressionMemory,
26 | OSMemory,
27 | STRATEGY_INFO,
28 | get_openai_client
29 | )
30 |
31 |
32 | # Pydantic models for API requests/responses
33 | class ChatRequest(BaseModel):
34 | message: str
35 | api_key: Optional[str] = None
36 |
37 |
38 | class ChatResponse(BaseModel):
39 | response: str
40 | user_input: str
41 | retrieval_time: float
42 | generation_time: float
43 | prompt_tokens: int
44 | session_id: str
45 | strategy_type: str
46 |
47 |
48 | class SessionCreateRequest(BaseModel):
49 | strategy_type: str
50 | strategy_config: Optional[Dict[str, Any]] = {}
51 | system_prompt: Optional[str] = "You are a helpful AI assistant."
52 | api_key: Optional[str] = None
53 |
54 |
55 | class SessionResponse(BaseModel):
56 | session_id: str
57 | strategy_type: str
58 | strategy_config: Dict[str, Any]
59 | created: bool
60 |
61 |
62 | class MemoryStatsResponse(BaseModel):
63 | session_id: str
64 | strategy_type: str
65 | memory_stats: Dict[str, Any]
66 |
67 |
68 | class StrategyInfoResponse(BaseModel):
69 | strategy_name: str
70 | complexity: str
71 | description: str
72 | best_for: str
73 | default_config: Dict[str, Any]
74 |
75 |
76 | # Global storage for active sessions
77 | active_sessions: Dict[str, AIAgent] = {}
78 |
79 | # Available strategies with their default configurations
80 | AVAILABLE_STRATEGIES = {
81 | "sequential": {
82 | "class": SequentialMemory,
83 | "default_config": {},
84 | "description": "Stores all conversation history chronologically"
85 | },
86 | "sliding_window": {
87 | "class": SlidingWindowMemory,
88 | "default_config": {"window_size": 4},
89 | "description": "Maintains only the most recent N conversations"
90 | },
91 | "summarization": {
92 | "class": SummarizationMemory,
93 | "default_config": {"summary_threshold": 4},
94 | "description": "Compresses conversation history using LLM summarization"
95 | },
96 | "retrieval": {
97 | "class": RetrievalMemory,
98 | "default_config": {"k": 2, "embedding_dim": 1536},
99 | "description": "Uses vector embeddings and similarity search (RAG)"
100 | },
101 | "memory_augmented": {
102 | "class": MemoryAugmentedMemory,
103 | "default_config": {"window_size": 2},
104 | "description": "Combines sliding window with persistent memory tokens"
105 | },
106 | "hierarchical": {
107 | "class": HierarchicalMemory,
108 | "default_config": {"window_size": 2, "k": 2, "embedding_dim": 1536},
109 | "description": "Multi-layered system with working + long-term memory"
110 | },
111 | "graph": {
112 | "class": GraphMemory,
113 | "default_config": {},
114 | "description": "Treats conversations as nodes with relationship edges"
115 | },
116 | "compression": {
117 | "class": CompressionMemory,
118 | "default_config": {"compression_ratio": 0.5, "importance_threshold": 0.7},
119 | "description": "Intelligent compression and integration of historical data"
120 | },
121 | "os_memory": {
122 | "class": OSMemory,
123 | "default_config": {"ram_size": 2},
124 | "description": "Simulates RAM/disk with active/passive memory"
125 | }
126 | }
127 |
128 |
129 | @asynccontextmanager
130 | async def lifespan(app: FastAPI):
131 | """Application lifespan manager."""
132 | # Startup
133 | print("Starting AI Agent Memory Strategies API...")
134 | yield
135 | # Shutdown
136 | print("Shutting down API...")
137 | active_sessions.clear()
138 |
139 |
140 | # Initialize FastAPI app
141 | app = FastAPI(
142 | title="AI Agent Memory Design & Optimization API",
143 | description="RESTful API for testing and using multiple AI agent memory optimization techniques",
144 | version="1.0.0",
145 | lifespan=lifespan
146 | )
147 |
148 | # Add CORS middleware
149 | app.add_middleware(
150 | CORSMiddleware,
151 | allow_origins=["*"],
152 | allow_credentials=True,
153 | allow_methods=["*"],
154 | allow_headers=["*"],
155 | )
156 |
157 |
158 | def get_openai_client_with_key(api_key: Optional[str] = None):
159 | """Get OpenAI client with provided API key or from environment."""
160 | if api_key:
161 | from openai import OpenAI
162 | return OpenAI(api_key=api_key)
163 | return get_openai_client()
164 |
165 |
166 | @app.get("/")
167 | async def root():
168 | """Root endpoint with API information."""
169 | return {
170 | "message": "AI Agent Memory Strategies API",
171 | "version": "1.0.0",
172 | "available_strategies": list(AVAILABLE_STRATEGIES.keys()),
173 | "endpoints": {
174 | "GET /strategies": "List all available memory strategies",
175 | "POST /sessions": "Create a new chat session with a memory strategy",
176 | "POST /sessions/{session_id}/chat": "Send a message to a specific session",
177 | "GET /sessions/{session_id}/stats": "Get memory statistics for a session",
178 | "DELETE /sessions/{session_id}": "Delete a session",
179 | "GET /sessions": "List all active sessions"
180 | }
181 | }
182 |
183 |
184 | @app.get("/strategies", response_model=List[StrategyInfoResponse])
185 | async def list_strategies():
186 | """List all available memory strategies with their information."""
187 | strategies = []
188 |
189 | for strategy_key, strategy_data in AVAILABLE_STRATEGIES.items():
190 | strategy_class = strategy_data["class"]
191 | strategy_name = strategy_class.__name__
192 |
193 | # Get strategy info from STRATEGY_INFO
194 | info = STRATEGY_INFO.get(strategy_name, {})
195 |
196 | strategies.append(StrategyInfoResponse(
197 | strategy_name=strategy_key,
198 | complexity=info.get("complexity", "Unknown"),
199 | description=info.get("description", strategy_data["description"]),
200 | best_for=info.get("best_for", "General use"),
201 | default_config=strategy_data["default_config"]
202 | ))
203 |
204 | return strategies
205 |
206 |
207 | @app.post("/sessions", response_model=SessionResponse)
208 | async def create_session(request: SessionCreateRequest):
209 | """Create a new chat session with specified memory strategy."""
210 | if request.strategy_type not in AVAILABLE_STRATEGIES:
211 | raise HTTPException(
212 | status_code=400,
213 | detail=f"Invalid strategy type. Available: {list(AVAILABLE_STRATEGIES.keys())}"
214 | )
215 |
216 | try:
217 | # Get strategy configuration
218 | strategy_info = AVAILABLE_STRATEGIES[request.strategy_type]
219 | strategy_class = strategy_info["class"]
220 |
221 | # Merge default config with user config
222 | config = {**strategy_info["default_config"], **request.strategy_config}
223 |
224 | # Get OpenAI client
225 | client = get_openai_client_with_key(request.api_key)
226 |
227 | # Add client to config if strategy supports it
228 | if hasattr(strategy_class, '__init__'):
229 | import inspect
230 | sig = inspect.signature(strategy_class.__init__)
231 | if 'client' in sig.parameters:
232 | config['client'] = client
233 |
234 | # Initialize memory strategy
235 | memory_strategy = strategy_class(**config)
236 |
237 | # Create AI agent
238 | agent = AIAgent(
239 | memory_strategy=memory_strategy,
240 | system_prompt=request.system_prompt,
241 | client=client
242 | )
243 |
244 | # Generate session ID and store
245 | session_id = str(uuid.uuid4())
246 | active_sessions[session_id] = agent
247 |
248 | return SessionResponse(
249 | session_id=session_id,
250 | strategy_type=request.strategy_type,
251 | strategy_config=config,
252 | created=True
253 | )
254 |
255 | except Exception as e:
256 | raise HTTPException(status_code=500, detail=f"Error creating session: {str(e)}")
257 |
258 |
259 | @app.post("/sessions/{session_id}/chat", response_model=ChatResponse)
260 | async def chat_with_session(session_id: str, request: ChatRequest):
261 | """Send a message to a specific session."""
262 | if session_id not in active_sessions:
263 | raise HTTPException(status_code=404, detail="Session not found")
264 |
265 | try:
266 | agent = active_sessions[session_id]
267 |
268 | # Process the chat message
269 | result = agent.chat(request.message, verbose=False)
270 |
271 | return ChatResponse(
272 | response=result["ai_response"],
273 | user_input=result["user_input"],
274 | retrieval_time=result["retrieval_time"],
275 | generation_time=result["generation_time"],
276 | prompt_tokens=result["prompt_tokens"],
277 | session_id=session_id,
278 | strategy_type=type(agent.memory).__name__
279 | )
280 |
281 | except Exception as e:
282 | raise HTTPException(status_code=500, detail=f"Error processing chat: {str(e)}")
283 |
284 |
285 | @app.get("/sessions/{session_id}/stats", response_model=MemoryStatsResponse)
286 | async def get_session_stats(session_id: str):
287 | """Get memory statistics for a specific session."""
288 | if session_id not in active_sessions:
289 | raise HTTPException(status_code=404, detail="Session not found")
290 |
291 | try:
292 | agent = active_sessions[session_id]
293 | stats = agent.get_memory_stats()
294 |
295 | return MemoryStatsResponse(
296 | session_id=session_id,
297 | strategy_type=type(agent.memory).__name__,
298 | memory_stats=stats
299 | )
300 |
301 | except Exception as e:
302 | raise HTTPException(status_code=500, detail=f"Error getting stats: {str(e)}")
303 |
304 |
305 | @app.delete("/sessions/{session_id}")
306 | async def delete_session(session_id: str):
307 | """Delete a specific session."""
308 | if session_id not in active_sessions:
309 | raise HTTPException(status_code=404, detail="Session not found")
310 |
311 | del active_sessions[session_id]
312 | return {"message": f"Session {session_id} deleted successfully"}
313 |
314 |
315 | @app.get("/sessions")
316 | async def list_sessions():
317 | """List all active sessions."""
318 | sessions = []
319 | for session_id, agent in active_sessions.items():
320 | sessions.append({
321 | "session_id": session_id,
322 | "strategy_type": type(agent.memory).__name__,
323 | "system_prompt": agent.system_prompt[:50] + "..." if len(agent.system_prompt) > 50 else agent.system_prompt
324 | })
325 |
326 | return {"active_sessions": len(sessions), "sessions": sessions}
327 |
328 |
329 | @app.post("/sessions/{session_id}/clear")
330 | async def clear_session_memory(session_id: str):
331 | """Clear memory for a specific session."""
332 | if session_id not in active_sessions:
333 | raise HTTPException(status_code=404, detail="Session not found")
334 |
335 | try:
336 | agent = active_sessions[session_id]
337 | agent.clear_memory()
338 | return {"message": f"Memory cleared for session {session_id}"}
339 |
340 | except Exception as e:
341 | raise HTTPException(status_code=500, detail=f"Error clearing memory: {str(e)}")
342 |
343 |
344 | # Health check endpoint
345 | @app.get("/health")
346 | async def health_check():
347 | """Health check endpoint."""
348 | return {
349 | "status": "healthy",
350 | "active_sessions": len(active_sessions),
351 | "openai_configured": bool(os.getenv("OPENAI_API_KEY"))
352 | }
353 |
354 |
355 | if __name__ == "__main__":
356 | import uvicorn
357 | uvicorn.run(app, host="0.0.0.0", port=8000)
358 |
--------------------------------------------------------------------------------
/streamlit_playground.py:
--------------------------------------------------------------------------------
1 | """
2 | AI Agent Memory Design & Optimization - Streamlit Playground
3 | """
4 |
5 | import streamlit as st
6 | import plotly.express as px
7 | import plotly.graph_objects as go
8 | import pandas as pd
9 | import time
10 | import json
11 | from typing import Dict, Any, List
12 | import os
13 |
14 | # Import memory strategies
15 | from memory_strategies import (
16 | AIAgent,
17 | SequentialMemory,
18 | SlidingWindowMemory,
19 | SummarizationMemory,
20 | RetrievalMemory,
21 | MemoryAugmentedMemory,
22 | HierarchicalMemory,
23 | GraphMemory,
24 | CompressionMemory,
25 | OSMemory,
26 | STRATEGY_INFO
27 | )
28 |
29 | # Page configuration
30 | st.set_page_config(
31 | page_title="AI Agent Memory Design & Optimization",
32 | page_icon="🤖",
33 | layout="wide",
34 | initial_sidebar_state="expanded"
35 | )
36 |
37 | # Custom CSS for modern styling
38 | st.markdown("""
39 |
92 | """, unsafe_allow_html=True)
93 |
94 | # Initialize session state
95 | def initialize_session_state():
96 | """Initialize Streamlit session state variables."""
97 | if 'agents' not in st.session_state:
98 | st.session_state.agents = {}
99 | if 'chat_history' not in st.session_state:
100 | st.session_state.chat_history = {}
101 | if 'performance_metrics' not in st.session_state:
102 | st.session_state.performance_metrics = {}
103 | if 'api_key_set' not in st.session_state:
104 | st.session_state.api_key_set = False
105 |
106 | def setup_openai_client(api_key: str):
107 | """Setup OpenAI client with provided API key."""
108 | if api_key:
109 | os.environ["OPENAI_API_KEY"] = api_key
110 | st.session_state.api_key_set = True
111 | return True
112 | return False
113 |
114 | def get_strategy_class_and_config(strategy_name: str) -> tuple:
115 | """Get strategy class and default configuration."""
116 | strategy_mapping = {
117 | "Sequential Memory": (SequentialMemory, {}),
118 | "Sliding Window Memory": (SlidingWindowMemory, {"window_size": 4}),
119 | "Summarization Memory": (SummarizationMemory, {"summary_threshold": 4}),
120 | "Retrieval Memory (RAG)": (RetrievalMemory, {"k": 2}),
121 | "Memory-Augmented Memory": (MemoryAugmentedMemory, {"window_size": 2}),
122 | "Hierarchical Memory": (HierarchicalMemory, {"window_size": 2, "k": 2}),
123 | "Graph Memory": (GraphMemory, {}),
124 | "Compression Memory": (CompressionMemory, {"compression_ratio": 0.5}),
125 | "OS-like Memory": (OSMemory, {"ram_size": 2})
126 | }
127 | return strategy_mapping.get(strategy_name, (SequentialMemory, {}))
128 |
129 | def create_agent(strategy_name: str, config: Dict[str, Any]) -> AIAgent:
130 | """Create an AI agent with specified strategy and configuration."""
131 | strategy_class, default_config = get_strategy_class_and_config(strategy_name)
132 |
133 | # Merge configurations
134 | final_config = {**default_config, **config}
135 |
136 | # Create strategy instance
137 | memory_strategy = strategy_class(**final_config)
138 |
139 | # Create agent
140 | agent = AIAgent(
141 | memory_strategy=memory_strategy,
142 | system_prompt="You are a helpful AI assistant with advanced memory capabilities."
143 | )
144 |
145 | return agent
146 |
147 | def render_sidebar():
148 | """Render the sidebar with API key input and strategy information."""
149 | st.sidebar.markdown("""
150 |
154 | """, unsafe_allow_html=True)
155 |
156 | # API Key input
157 | api_key = st.sidebar.text_input(
158 | "OpenAI API Key",
159 | type="password",
160 | placeholder="sk-...",
161 | help="Enter your OpenAI API key. This is required for all memory strategies."
162 | )
163 |
164 | if api_key:
165 | if setup_openai_client(api_key):
166 | st.sidebar.success("✅ API Key configured successfully!")
167 | else:
168 | st.sidebar.error("❌ Invalid API Key")
169 | elif not st.session_state.api_key_set:
170 | st.sidebar.warning("⚠️ Please enter your OpenAI API key to continue")
171 | return False
172 |
173 | st.sidebar.markdown("---")
174 |
175 | # Strategy information
176 | st.sidebar.markdown("""
177 |
181 | """, unsafe_allow_html=True)
182 |
183 | # Strategy complexity legend
184 | st.sidebar.markdown("### Complexity Levels")
185 | st.sidebar.markdown("🟢 **Basic** - Simple implementation")
186 | st.sidebar.markdown("🟡 **Advanced** - Moderate complexity")
187 | st.sidebar.markdown("🔴 **Complex** - High complexity")
188 |
189 | # Attribution section
190 | st.sidebar.markdown("---")
191 | st.sidebar.markdown(
192 | """
193 |
199 | """,
200 | unsafe_allow_html=True
201 | )
202 |
203 | return True
204 |
205 | def render_main_header():
206 | """Render the main header."""
207 | st.markdown("""
208 |
209 |
AI Agent Memory Design & Optimization Playground
210 |
Interactive testing environment for multiple memory optimization techniques
211 |
212 | """, unsafe_allow_html=True)
213 |
214 | def render_strategy_overview():
215 | """Render strategy overview cards."""
216 | st.markdown("## Available Memory Strategies")
217 |
218 | # Create columns for strategy cards
219 | col1, col2, col3 = st.columns(3)
220 |
221 | strategies = [
222 | ("Sequential Memory", "🟢", "Stores all conversation history"),
223 | ("Sliding Window Memory", "🟢", "Recent N conversations only"),
224 | ("Summarization Memory", "🟢", "LLM-based compression"),
225 | ("Retrieval Memory (RAG)", "🟡", "Vector similarity search"),
226 | ("Memory-Augmented Memory", "🟡", "Persistent memory tokens"),
227 | ("Hierarchical Memory", "🟡", "Multi-layered memory"),
228 | ("Graph Memory", "🔴", "Relationship modeling"),
229 | ("Compression Memory", "🔴", "Intelligent compression"),
230 | ("OS-like Memory", "🔴", "RAM/disk simulation")
231 | ]
232 |
233 | for i, (name, complexity, desc) in enumerate(strategies):
234 | col = [col1, col2, col3][i % 3]
235 | with col:
236 | st.markdown(f"""
237 |
238 |
{complexity} {name}
239 |
{desc}
240 |
241 | """, unsafe_allow_html=True)
242 |
243 | def render_single_strategy_tester():
244 | """Render single strategy testing interface."""
245 | st.markdown("## Single Strategy Tester")
246 |
247 | col1, col2 = st.columns([1, 2])
248 |
249 | with col1:
250 | # Strategy selection
251 | strategy_name = st.selectbox(
252 | "Choose Memory Strategy",
253 | [
254 | "Sequential Memory",
255 | "Sliding Window Memory",
256 | "Summarization Memory",
257 | "Retrieval Memory (RAG)",
258 | "Memory-Augmented Memory",
259 | "Hierarchical Memory",
260 | "Graph Memory",
261 | "Compression Memory",
262 | "OS-like Memory"
263 | ]
264 | )
265 |
266 | # Strategy configuration
267 | st.markdown("### Configuration")
268 | config = {}
269 |
270 | if strategy_name == "Sliding Window Memory":
271 | config["window_size"] = st.slider("Window Size", 1, 10, 4)
272 | elif strategy_name == "Summarization Memory":
273 | config["summary_threshold"] = st.slider("Summary Threshold", 2, 10, 4)
274 | elif strategy_name == "Retrieval Memory (RAG)":
275 | config["k"] = st.slider("Retrieval Count (k)", 1, 5, 2)
276 | elif strategy_name == "Memory-Augmented Memory":
277 | config["window_size"] = st.slider("Window Size", 1, 5, 2)
278 | elif strategy_name == "Hierarchical Memory":
279 | config["window_size"] = st.slider("Working Memory Size", 1, 5, 2)
280 | config["k"] = st.slider("Long-term Retrieval (k)", 1, 5, 2)
281 | elif strategy_name == "Compression Memory":
282 | config["compression_ratio"] = st.slider("Compression Ratio", 0.1, 0.9, 0.5)
283 | elif strategy_name == "OS-like Memory":
284 | config["ram_size"] = st.slider("RAM Size", 1, 5, 2)
285 |
286 | # Initialize agent button
287 | if st.button("🚀 Initialize Agent", type="primary"):
288 | try:
289 | agent = create_agent(strategy_name, config)
290 | st.session_state.agents[strategy_name] = agent
291 | st.session_state.chat_history[strategy_name] = []
292 | st.session_state.performance_metrics[strategy_name] = []
293 | st.success(f"✅ {strategy_name} agent initialized!")
294 | except Exception as e:
295 | st.error(f"❌ Error initializing agent: {str(e)}")
296 |
297 | with col2:
298 | if strategy_name in st.session_state.agents:
299 | # Chat interface
300 | st.markdown("### Chat Interface")
301 |
302 | # Display chat history
303 | chat_container = st.container()
304 | with chat_container:
305 | for msg in st.session_state.chat_history[strategy_name]:
306 | if msg["role"] == "user":
307 | st.markdown(f"""
308 |
309 | You: {msg["content"]}
310 |
311 | """, unsafe_allow_html=True)
312 | else:
313 | st.markdown(f"""
314 |
315 | AI: {msg["content"]}
316 |
⏱️ {msg.get('time', 0):.2f}s | 🔢 {msg.get('tokens', 0)} tokens
317 |
318 | """, unsafe_allow_html=True)
319 |
320 | # Chat input
321 | user_input = st.text_input("Your message:", key=f"input_{strategy_name}")
322 |
323 | col_send, col_clear = st.columns([1, 1])
324 |
325 | with col_send:
326 | if st.button("Send", key=f"send_{strategy_name}"):
327 | if user_input:
328 | try:
329 | agent = st.session_state.agents[strategy_name]
330 | result = agent.chat(user_input, verbose=False)
331 |
332 | # Add to chat history
333 | st.session_state.chat_history[strategy_name].extend([
334 | {"role": "user", "content": user_input},
335 | {
336 | "role": "assistant",
337 | "content": result["ai_response"],
338 | "time": result["generation_time"],
339 | "tokens": result["prompt_tokens"]
340 | }
341 | ])
342 |
343 | # Add to performance metrics
344 | st.session_state.performance_metrics[strategy_name].append({
345 | "turn": len(st.session_state.performance_metrics[strategy_name]) + 1,
346 | "tokens": result["prompt_tokens"],
347 | "retrieval_time": result["retrieval_time"],
348 | "generation_time": result["generation_time"]
349 | })
350 |
351 | st.rerun()
352 |
353 | except Exception as e:
354 | st.error(f"❌ Error: {str(e)}")
355 |
356 | with col_clear:
357 | if st.button("🗑️ Clear", key=f"clear_{strategy_name}"):
358 | if strategy_name in st.session_state.agents:
359 | st.session_state.agents[strategy_name].clear_memory()
360 | st.session_state.chat_history[strategy_name] = []
361 | st.session_state.performance_metrics[strategy_name] = []
362 | st.success("🧹 Memory cleared!")
363 | st.rerun()
364 |
365 | # Memory statistics
366 | if strategy_name in st.session_state.agents:
367 | st.markdown("### Memory Statistics")
368 | try:
369 | stats = st.session_state.agents[strategy_name].get_memory_stats()
370 |
371 | # Display key metrics
372 | metric_cols = st.columns(3)
373 | with metric_cols[0]:
374 | st.markdown(f"""
375 |
376 |
Strategy Type
377 |
{stats.get('strategy_type', 'Unknown')}
378 |
379 | """, unsafe_allow_html=True)
380 |
381 | with metric_cols[1]:
382 | st.markdown(f"""
383 |
384 |
Memory Size
385 |
{stats.get('memory_size', 'Unknown')}
386 |
387 | """, unsafe_allow_html=True)
388 |
389 | with metric_cols[2]:
390 | turns = len(st.session_state.chat_history[strategy_name]) // 2
391 | st.markdown(f"""
392 |
393 |
Conversation Turns
394 |
{turns}
395 |
396 | """, unsafe_allow_html=True)
397 |
398 | # Detailed stats
399 | with st.expander("📈 Detailed Statistics"):
400 | st.json(stats)
401 |
402 | except Exception as e:
403 | st.error(f"Error getting stats: {str(e)}")
404 | else:
405 | st.info("👈 Please initialize an agent first to start chatting!")
406 |
407 | def render_performance_dashboard():
408 | """Render performance comparison dashboard."""
409 | st.markdown("## Performance Dashboard")
410 |
411 | if not st.session_state.performance_metrics:
412 | st.info("No performance data yet. Test some strategies to see metrics!")
413 | return
414 |
415 | # Create performance comparison charts
416 | col1, col2 = st.columns(2)
417 |
418 | with col1:
419 | st.markdown("### Token Usage Over Time")
420 |
421 | # Prepare data for token usage chart
422 | token_data = []
423 | for strategy, metrics in st.session_state.performance_metrics.items():
424 | for metric in metrics:
425 | token_data.append({
426 | "Strategy": strategy,
427 | "Turn": metric["turn"],
428 | "Tokens": metric["tokens"]
429 | })
430 |
431 | if token_data:
432 | df_tokens = pd.DataFrame(token_data)
433 | fig_tokens = px.line(
434 | df_tokens,
435 | x="Turn",
436 | y="Tokens",
437 | color="Strategy",
438 | title="Token Usage Comparison",
439 | color_discrete_sequence=px.colors.qualitative.Set3
440 | )
441 | fig_tokens.update_layout(
442 | plot_bgcolor='rgba(0,0,0,0)',
443 | paper_bgcolor='rgba(0,0,0,0)'
444 | )
445 | st.plotly_chart(fig_tokens, use_container_width=True)
446 |
447 | with col2:
448 | st.markdown("### Response Time Analysis")
449 |
450 | # Prepare data for response time chart
451 | time_data = []
452 | for strategy, metrics in st.session_state.performance_metrics.items():
453 | for metric in metrics:
454 | time_data.append({
455 | "Strategy": strategy,
456 | "Turn": metric["turn"],
457 | "Generation Time": metric["generation_time"],
458 | "Retrieval Time": metric["retrieval_time"]
459 | })
460 |
461 | if time_data:
462 | df_times = pd.DataFrame(time_data)
463 | fig_times = px.bar(
464 | df_times,
465 | x="Strategy",
466 | y=["Generation Time", "Retrieval Time"],
467 | title="Average Response Times",
468 | color_discrete_sequence=["#667eea", "#764ba2"]
469 | )
470 | fig_times.update_layout(
471 | plot_bgcolor='rgba(0,0,0,0)',
472 | paper_bgcolor='rgba(0,0,0,0)'
473 | )
474 | st.plotly_chart(fig_times, use_container_width=True)
475 |
476 | def render_batch_tester():
477 | """Render batch testing interface for comparing multiple strategies."""
478 | st.markdown("## Batch Strategy Comparison")
479 |
480 | col1, col2 = st.columns([1, 2])
481 |
482 | with col1:
483 | st.markdown("### Test Configuration")
484 |
485 | # Strategy selection
486 | strategies_to_test = st.multiselect(
487 | "Select Strategies to Compare",
488 | [
489 | "Sequential Memory",
490 | "Sliding Window Memory",
491 | "Retrieval Memory (RAG)",
492 | "Hierarchical Memory"
493 | ],
494 | default=["Sequential Memory", "Retrieval Memory (RAG)"]
495 | )
496 |
497 | # Test conversations
498 | st.markdown("### Test Conversations")
499 | test_conversations = st.text_area(
500 | "Enter test messages (one per line)",
501 | value="Hi! My name is Alex and I'm a software engineer.\nI'm working on a machine learning project.\nI prefer Python and love coffee.\nWhat do you remember about me?",
502 | height=150
503 | ).split('\n')
504 |
505 | if st.button("🚀 Run Batch Test", type="primary"):
506 | if strategies_to_test and test_conversations:
507 | run_batch_test(strategies_to_test, test_conversations)
508 |
509 | with col2:
510 | if 'batch_results' in st.session_state:
511 | st.markdown("### Batch Test Results")
512 | display_batch_results()
513 |
514 | def run_batch_test(strategies: List[str], conversations: List[str]):
515 | """Run batch test on multiple strategies."""
516 | results = {}
517 |
518 | progress_bar = st.progress(0)
519 | status_text = st.empty()
520 |
521 | total_steps = len(strategies) * len(conversations)
522 | current_step = 0
523 |
524 | for strategy_name in strategies:
525 | status_text.text(f"Testing {strategy_name}...")
526 |
527 | try:
528 | # Create agent
529 | agent = create_agent(strategy_name, {})
530 |
531 | strategy_results = {
532 | "responses": [],
533 | "metrics": [],
534 | "final_stats": {}
535 | }
536 |
537 | # Run conversations
538 | for i, conversation in enumerate(conversations):
539 | if conversation.strip():
540 | result = agent.chat(conversation.strip(), verbose=False)
541 |
542 | strategy_results["responses"].append({
543 | "turn": i + 1,
544 | "user": conversation.strip(),
545 | "ai": result["ai_response"],
546 | "tokens": result["prompt_tokens"],
547 | "time": result["generation_time"]
548 | })
549 |
550 | strategy_results["metrics"].append({
551 | "turn": i + 1,
552 | "tokens": result["prompt_tokens"],
553 | "generation_time": result["generation_time"],
554 | "retrieval_time": result["retrieval_time"]
555 | })
556 |
557 | current_step += 1
558 | progress_bar.progress(current_step / total_steps)
559 |
560 | # Get final memory stats
561 | strategy_results["final_stats"] = agent.get_memory_stats()
562 | results[strategy_name] = strategy_results
563 |
564 | except Exception as e:
565 | st.error(f"Error testing {strategy_name}: {str(e)}")
566 |
567 | st.session_state.batch_results = results
568 | status_text.text("✅ Batch test completed!")
569 | progress_bar.progress(1.0)
570 |
571 | def display_batch_results():
572 | """Display batch test results."""
573 | results = st.session_state.batch_results
574 |
575 | # Summary metrics
576 | st.markdown("#### Summary Metrics")
577 |
578 | summary_data = []
579 | for strategy, data in results.items():
580 | if data["metrics"]:
581 | avg_tokens = sum(m["tokens"] for m in data["metrics"]) / len(data["metrics"])
582 | avg_time = sum(m["generation_time"] for m in data["metrics"]) / len(data["metrics"])
583 |
584 | summary_data.append({
585 | "Strategy": strategy,
586 | "Avg Tokens": f"{avg_tokens:.0f}",
587 | "Avg Response Time": f"{avg_time:.2f}s",
588 | "Memory Size": data["final_stats"].get("memory_size", "Unknown")
589 | })
590 |
591 | if summary_data:
592 | df_summary = pd.DataFrame(summary_data)
593 | st.dataframe(df_summary, use_container_width=True)
594 |
595 | # Detailed results
596 | for strategy, data in results.items():
597 | with st.expander(f"{strategy} - Detailed Results"):
598 | st.markdown("**Final Response:**")
599 | if data["responses"]:
600 | final_response = data["responses"][-1]
601 | st.markdown(f"*User:* {final_response['user']}")
602 | st.markdown(f"*AI:* {final_response['ai']}")
603 |
604 | st.markdown("**Memory Statistics:**")
605 | st.json(data["final_stats"])
606 |
607 | def main():
608 | """Main application function."""
609 | initialize_session_state()
610 |
611 | # Render sidebar
612 | if not render_sidebar():
613 | st.warning("⚠️ Please configure your OpenAI API key in the sidebar to continue.")
614 | return
615 |
616 | # Render main content
617 | render_main_header()
618 |
619 | # Navigation tabs
620 | tab1, tab2, tab3, tab4 = st.tabs([
621 | "Overview",
622 | "Single Tester",
623 | "Batch Comparison",
624 | "Performance Dashboard"
625 | ])
626 |
627 | with tab1:
628 | render_strategy_overview()
629 |
630 | st.markdown("## Getting Started")
631 | st.markdown("""
632 | 1. **Configure API Key**: Enter your OpenAI API key in the sidebar
633 | 2. **Choose Strategy**: Select a memory strategy to test
634 | 3. **Configure Settings**: Adjust strategy parameters as needed
635 | 4. **Start Chatting**: Initialize an agent and begin testing
636 | 5. **Compare Performance**: Use batch testing to compare strategies
637 | """)
638 |
639 | st.markdown("## Strategy Guide")
640 |
641 | guide_col1, guide_col2 = st.columns(2)
642 |
643 | with guide_col1:
644 | st.markdown("""
645 | **🟢 Basic Strategies (Easy to implement)**
646 | - **Sequential**: Perfect recall, but expensive for long chats
647 | - **Sliding Window**: Fixed memory, loses old information
648 | - **Summarization**: Compresses history, may lose details
649 | """)
650 |
651 | with guide_col2:
652 | st.markdown("""
653 | **🟡🔴 Advanced Strategies (Production-ready)**
654 | - **Retrieval (RAG)**: Industry standard, semantic search
655 | - **Hierarchical**: Human-like memory patterns
656 | - **Graph**: Complex relationship modeling
657 | """)
658 |
659 | with tab2:
660 | render_single_strategy_tester()
661 |
662 | with tab3:
663 | render_batch_tester()
664 |
665 | with tab4:
666 | render_performance_dashboard()
667 |
668 | if __name__ == "__main__":
669 | main()
670 |
--------------------------------------------------------------------------------