├── .nixpacksignore ├── .cursorignore ├── .gitattributes ├── automem ├── utils │ ├── __init__.py │ ├── graph.py │ ├── tags.py │ ├── text.py │ ├── validation.py │ ├── time.py │ └── scoring.py ├── __init__.py ├── embedding │ ├── __init__.py │ ├── provider.py │ ├── placeholder.py │ ├── openai.py │ └── fastembed.py ├── stores │ ├── vector_store.py │ └── graph_store.py ├── api │ ├── enrichment.py │ ├── consolidation.py │ └── health.py └── config.py ├── test-live-server-auto.sh ├── pytest.ini ├── .dockerignore ├── railway.toml ├── requirements-dev.txt ├── mcp-sse-server ├── Dockerfile ├── package.json ├── railway.json ├── README.md └── test │ └── server.test.js ├── railway.json ├── requirements.txt ├── .gitignore ├── Dockerfile ├── .railway ├── falkordb.Dockerfile └── backup-falkordb.sh ├── scripts ├── Dockerfile.health-monitor ├── reenrich_batch.py ├── deduplicate_qdrant.py ├── recover_from_qdrant.py ├── reembed_embeddings.py ├── reclassify_with_llm.py └── cleanup_memory_types.py ├── run-integration-tests.sh ├── tests ├── benchmarks │ ├── BENCHMARK_2025-11-08.md │ ├── BENCHMARK_2025-12-02.md │ ├── test_multihop_quick.py │ ├── BENCHMARK_2025-10-15.md │ └── BENCHMARK_2025-11-20.md ├── test_enrichment.py ├── conftest.py └── test_consolidation_engine.py ├── .env.example ├── LICENSE ├── .github └── workflows │ ├── release-please.yml │ ├── backup.yml │ └── ci.yml ├── .pre-commit-config.yaml ├── test-live-server.sh ├── docs ├── AGENT_TEMPLATE.md ├── API.md ├── MCP_SSE.md └── MIGRATIONS.md ├── AGENTS.md ├── docker-compose.yml ├── .secrets.baseline ├── Makefile └── test-locomo-benchmark.sh /.nixpacksignore: -------------------------------------------------------------------------------- 1 | * 2 | -------------------------------------------------------------------------------- /.cursorignore: -------------------------------------------------------------------------------- 1 | !.env 2 | !.env.example 3 | !.cursor* 4 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /automem/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Utility subpackage for small, pure helper functions.""" 2 | -------------------------------------------------------------------------------- /test-live-server-auto.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Backwards-compatible wrapper for non-interactive live test run 3 | set -e 4 | cd "$(dirname "$0")" 5 | ./test-live-server.sh --non-interactive "$@" 6 | -------------------------------------------------------------------------------- /automem/__init__.py: -------------------------------------------------------------------------------- 1 | """Automem internal modules package. 2 | 3 | Holds refactored modules extracted from app.py to reduce surface area and 4 | improve maintainability without changing behavior. 5 | """ 6 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | ignore::DeprecationWarning:spacy.* 4 | ignore::DeprecationWarning:weasel.* 5 | ignore:Importing 'parser.split_arg_string' is deprecated.*:DeprecationWarning 6 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | venv 2 | .git 3 | __pycache__ 4 | .pytest_cache 5 | backups 6 | .cursor 7 | .claude 8 | .vscode 9 | *.pyc 10 | *.pyo 11 | *.pyd 12 | *.log 13 | *.swp 14 | *.swo 15 | *.tmp 16 | node_modules 17 | dist 18 | build 19 | *.egg-info 20 | -------------------------------------------------------------------------------- /railway.toml: -------------------------------------------------------------------------------- 1 | # railway.toml - Remove the startCommand completely 2 | [build] 3 | builder = "DOCKERFILE" 4 | 5 | [deploy] 6 | # Remove startCommand - let Docker image use its default 7 | restartPolicyType = "ON_FAILURE" 8 | restartPolicyMaxRetries = 10 9 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # requirements-dev.txt - Development dependencies 2 | -r requirements.txt 3 | 4 | # Development tools 5 | pytest==8.3.4 6 | black==24.8.0 7 | flake8==7.1.1 8 | isort==5.13.2 9 | pre-commit==4.0.1 10 | 11 | # Benchmark evaluation (LoCoMo official metrics) 12 | nltk==3.9.1 13 | -------------------------------------------------------------------------------- /mcp-sse-server/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:18-slim 2 | 3 | WORKDIR /app 4 | 5 | # Copy package files 6 | COPY package*.json ./ 7 | 8 | # Install dependencies 9 | RUN npm ci --only=production 10 | 11 | # Copy application code 12 | COPY server.js ./ 13 | 14 | # Railway injects PORT automatically 15 | EXPOSE 8080 16 | 17 | CMD ["node", "server.js"] 18 | -------------------------------------------------------------------------------- /mcp-sse-server/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "automem-mcp-sse-server", 3 | "version": "0.2.0", 4 | "private": true, 5 | "type": "module", 6 | "scripts": { 7 | "start": "node server.js", 8 | "test": "node --test" 9 | }, 10 | "dependencies": { 11 | "@modelcontextprotocol/sdk": "^1.20.0", 12 | "express": "^4.19.2" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /railway.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://railway.app/railway.schema.json", 3 | "build": { 4 | "builder": "DOCKERFILE", 5 | "dockerfilePath": "Dockerfile" 6 | }, 7 | "deploy": { 8 | "numReplicas": 1, 9 | "restartPolicyType": "ON_FAILURE", 10 | "restartPolicyMaxRetries": 10, 11 | "healthcheckPath": "/health", 12 | "healthcheckTimeout": 100 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /mcp-sse-server/railway.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://railway.app/railway.schema.json", 3 | "build": { 4 | "builder": "DOCKERFILE", 5 | "dockerfilePath": "mcp-sse-server/Dockerfile" 6 | }, 7 | "deploy": { 8 | "numReplicas": 1, 9 | "restartPolicyType": "ON_FAILURE", 10 | "restartPolicyMaxRetries": 10, 11 | "healthcheckPath": "/health", 12 | "healthcheckTimeout": 100 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # requirements.txt - Updated versions for 2024/2025 2 | flask==3.0.3 3 | falkordb==1.0.9 4 | qdrant-client==1.11.3 5 | python-dotenv==1.0.1 6 | python-dateutil==2.9.0 7 | openai==1.55.3 8 | spacy==3.8.7 9 | requests==2.31.0 10 | fastembed==0.4.2 11 | onnxruntime<1.20 # Pin to avoid issues with fastembed 0.4.2 12 | en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0.tar.gz 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .claude/settings.local.json 2 | *.code-workspace 3 | .env 4 | **/__pycache__ 5 | automation_hub_dashboard/.env 6 | automation_hub_dashboard/.venv/ 7 | reports/ 8 | venv/ 9 | /.cursor 10 | 11 | # Local backups (use S3 for persistent backups) 12 | backups/ 13 | 14 | # Log files 15 | *.log 16 | 17 | tests/benchmarks/locomo/ 18 | .DS_Store 19 | /.venv 20 | /automem/.venv 21 | 22 | # Node.js dependencies 23 | /mcp-sse-server/node_modules/ 24 | node_modules/ 25 | # Experiment results (promote notable runs to tests/benchmarks/results/) 26 | /tests/benchmarks/experiments/results_*/ 27 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Dockerfile - Flask API runtime image 2 | FROM python:3.11-slim 3 | 4 | ENV PYTHONDONTWRITEBYTECODE=1 \ 5 | PYTHONUNBUFFERED=1 6 | 7 | WORKDIR /app 8 | 9 | # Install system deps (none currently, but keep hook for Falkor client libs if needed) 10 | RUN apt-get update && apt-get install -y --no-install-recommends \ 11 | build-essential \ 12 | && rm -rf /var/lib/apt/lists/* 13 | 14 | COPY requirements.txt ./ 15 | RUN pip install --no-cache-dir -r requirements.txt 16 | 17 | # Copy the full application source into the image 18 | COPY . . 19 | 20 | EXPOSE 8001 21 | 22 | CMD ["python", "app.py"] 23 | -------------------------------------------------------------------------------- /.railway/falkordb.Dockerfile: -------------------------------------------------------------------------------- 1 | # FalkorDB with persistence and backup support 2 | FROM falkordb/falkordb:latest 3 | 4 | # Add backup script 5 | COPY .railway/backup-falkordb.sh /usr/local/bin/backup-falkordb.sh 6 | RUN chmod +x /usr/local/bin/backup-falkordb.sh 7 | 8 | # Configure persistence 9 | ENV REDIS_ARGS="--save 900 1 --save 300 10 --save 60 10000 --appendonly yes --dir /data" 10 | 11 | # Expose ports 12 | EXPOSE 6379 13 | 14 | # Health check 15 | HEALTHCHECK --interval=30s --timeout=3s --start-period=30s --retries=3 \ 16 | CMD redis-cli ping || exit 1 17 | 18 | # Volume for persistent data 19 | VOLUME ["/data"] 20 | 21 | CMD ["redis-server", "--loadmodule", "/usr/lib/redis/modules/libgraphcontext.so"] 22 | -------------------------------------------------------------------------------- /scripts/Dockerfile.health-monitor: -------------------------------------------------------------------------------- 1 | # Dockerfile for AutoMem Health Monitor Service 2 | FROM python:3.11-slim 3 | 4 | ENV PYTHONDONTWRITEBYTECODE=1 \ 5 | PYTHONUNBUFFERED=1 6 | 7 | WORKDIR /app 8 | 9 | # Install dependencies 10 | RUN apt-get update && apt-get install -y --no-install-recommends \ 11 | build-essential \ 12 | && rm -rf /var/lib/apt/lists/* 13 | 14 | COPY requirements.txt ./ 15 | RUN pip install --no-cache-dir -r requirements.txt 16 | 17 | # Copy application files 18 | COPY scripts/health_monitor.py scripts/ 19 | COPY scripts/recover_from_qdrant.py scripts/ 20 | 21 | # Run health monitor (alert-only mode by default for safety) 22 | # Override with --auto-recover if you want automatic recovery 23 | CMD ["python", "scripts/health_monitor.py", "--interval", "300"] 24 | -------------------------------------------------------------------------------- /run-integration-tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script to run integration tests with proper environment setup 3 | 4 | set -e 5 | 6 | # Ensure we're in the project directory 7 | cd "$(dirname "$0")" 8 | 9 | # Activate virtual environment 10 | source venv/bin/activate 11 | 12 | # Set required environment variables 13 | export AUTOMEM_RUN_INTEGRATION_TESTS=1 14 | export AUTOMEM_TEST_API_TOKEN=test-token 15 | export AUTOMEM_TEST_ADMIN_TOKEN=test-admin-token 16 | 17 | # Start Docker services with proper tokens 18 | echo "🐳 Starting Docker services..." 19 | AUTOMEM_API_TOKEN=test-token ADMIN_API_TOKEN=test-admin-token docker compose up -d 20 | 21 | # Wait for services to be ready 22 | echo "⏳ Waiting for services to be ready..." 23 | sleep 5 24 | 25 | # Run the tests 26 | echo "🧪 Running integration tests..." 27 | python -m pytest tests/test_integration.py -v "$@" 28 | 29 | echo "✅ Integration tests completed!" 30 | -------------------------------------------------------------------------------- /tests/benchmarks/BENCHMARK_2025-11-08.md: -------------------------------------------------------------------------------- 1 | # AutoMem Benchmark Results 2 | 3 | ## LoCoMo Benchmark (Long-term Conversational Memory) 4 | 5 | **Benchmark Version**: LoCoMo-10 (1,986 questions across 10 conversations) 6 | **Date**: November 8, 2025 7 | **AutoMem Version**: Latest (as of benchmark) 8 | 9 | ============================================================ 10 | 📊 FINAL RESULTS 11 | ============================================================ 12 | 13 | 🎯 Overall Accuracy: 76.08% (1511/1986) 14 | ⏱️ Total Time: 1500.0s 15 | 💾 Total Memories Stored: 5882 16 | 17 | 📈 Category Breakdown: 18 | Single-hop Recall : 59.22% (167/282) 19 | Temporal Understanding : 70.40% (226/321) 20 | Multi-hop Reasoning : 22.92% ( 22/ 96) 21 | Open Domain : 77.41% (651/841) 22 | Complex Reasoning : 99.78% (445/446) 23 | 24 | 🏆 Comparison with CORE (SOTA): 25 | CORE: 88.24% 26 | AutoMem: 76.08% 27 | 📉 AutoMem is 12.16% behind CORE 28 | -------------------------------------------------------------------------------- /automem/embedding/__init__.py: -------------------------------------------------------------------------------- 1 | """Embedding provider module for AutoMem. 2 | 3 | Provides abstraction over different embedding backends: 4 | - OpenAI (API-based, requires key) 5 | - FastEmbed (local model, no API key needed) 6 | - Placeholder (hash-based fallback) 7 | """ 8 | 9 | from .provider import EmbeddingProvider 10 | 11 | # Optional backends: guard imports to avoid hard dependencies at import time 12 | try: 13 | from .openai import OpenAIEmbeddingProvider # type: ignore 14 | except ImportError: 15 | OpenAIEmbeddingProvider = None # type: ignore[assignment] 16 | try: 17 | from .fastembed import FastEmbedProvider # type: ignore 18 | except ImportError: 19 | FastEmbedProvider = None # type: ignore[assignment] 20 | from .placeholder import PlaceholderEmbeddingProvider 21 | 22 | __all__ = [ 23 | "EmbeddingProvider", 24 | "FastEmbedProvider", 25 | "OpenAIEmbeddingProvider", 26 | "PlaceholderEmbeddingProvider", 27 | ] 28 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # Copy this file to ~/.config/automem/.env or export the values manually. 2 | FALKORDB_HOST=localhost 3 | FALKORDB_PORT=6379 4 | FALKORDB_GRAPH=memories 5 | QDRANT_URL= 6 | QDRANT_API_KEY= 7 | QDRANT_COLLECTION=memories 8 | VECTOR_SIZE=768 9 | PORT=8001 10 | OPENAI_API_KEY= 11 | AUTOMEM_API_TOKEN= 12 | ADMIN_API_TOKEN= 13 | 14 | # --- Testing / CI (optional) --- 15 | # Enable integration test suite (defaults to disabled) 16 | # AUTOMEM_RUN_INTEGRATION_TESTS=1 17 | # Start/stop Docker Compose automatically for integration tests 18 | # AUTOMEM_START_DOCKER=1 19 | # AUTOMEM_STOP_DOCKER=1 20 | # Override API base URL for integration tests (default http://localhost:8001) 21 | # AUTOMEM_TEST_BASE_URL=http://localhost:8001 22 | # Allow tests to run against a non-local host (requires explicit opt-in) 23 | # AUTOMEM_ALLOW_LIVE=0 24 | # Tokens the integration tests will use when calling the API 25 | # AUTOMEM_TEST_API_TOKEN= 26 | # AUTOMEM_TEST_ADMIN_TOKEN= 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Jack Arturo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.railway/backup-falkordb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Automated FalkorDB backup script 3 | # Run via cron or Railway scheduled task 4 | 5 | set -e 6 | 7 | BACKUP_DIR="${BACKUP_DIR:-/data/backups}" 8 | RETENTION_DAYS="${RETENTION_DAYS:-7}" 9 | TIMESTAMP=$(date +%Y%m%d_%H%M%S) 10 | 11 | mkdir -p "$BACKUP_DIR" 12 | 13 | echo "🔄 Starting FalkorDB backup at $TIMESTAMP" 14 | 15 | # Trigger Redis SAVE 16 | redis-cli SAVE 17 | 18 | # Copy RDB file 19 | if [ -f /data/dump.rdb ]; then 20 | cp /data/dump.rdb "$BACKUP_DIR/dump_${TIMESTAMP}.rdb" 21 | echo "✅ Backup created: dump_${TIMESTAMP}.rdb" 22 | 23 | # Compress old backups 24 | find "$BACKUP_DIR" -name "dump_*.rdb" -mtime +1 -exec gzip {} \; 25 | 26 | # Clean old backups 27 | find "$BACKUP_DIR" -name "dump_*.rdb.gz" -mtime +${RETENTION_DAYS} -delete 28 | echo "🧹 Cleaned backups older than ${RETENTION_DAYS} days" 29 | else 30 | echo "⚠️ No dump.rdb found" 31 | exit 1 32 | fi 33 | 34 | # Optional: Upload to S3 if credentials available 35 | if [ -n "$AWS_ACCESS_KEY_ID" ] && [ -n "$S3_BACKUP_BUCKET" ]; then 36 | aws s3 cp "$BACKUP_DIR/dump_${TIMESTAMP}.rdb" \ 37 | "s3://${S3_BACKUP_BUCKET}/automem/falkordb/dump_${TIMESTAMP}.rdb" 38 | echo "☁️ Uploaded to S3" 39 | fi 40 | 41 | echo "✅ Backup complete" 42 | -------------------------------------------------------------------------------- /mcp-sse-server/README.md: -------------------------------------------------------------------------------- 1 | # AutoMem MCP SSE Server 2 | 3 | Express service that bridges the AutoMem HTTP API to MCP over SSE and now exposes a lightweight Alexa skill endpoint. 4 | 5 | ## Endpoints 6 | - `GET /mcp/sse` and `POST /mcp/messages`: MCP over SSE (tools map to AutoMem HTTP API). 7 | - `POST /alexa`: Alexa Custom skill hook; supports `RememberIntent` (store) and `RecallIntent` (recall). 8 | - `GET /health`: Basic health probe. 9 | 10 | ## Env Vars 11 | - `AUTOMEM_ENDPOINT` (default `http://127.0.0.1:8001`) – AutoMem HTTP base URL. 12 | - `AUTOMEM_API_TOKEN` – Bearer token for AutoMem HTTP calls (required for Alexa and MCP). 13 | - `PORT` (optional) – Listener port (default 8080). 14 | 15 | Overrides for testing (Alexa endpoint): 16 | - `?endpoint=` query param or `endpoint` field in the POST body will override `AUTOMEM_ENDPOINT`. 17 | - `api_key` query param or `Authorization: Bearer ...` / `X-API-Key` header will override `AUTOMEM_API_TOKEN`. 18 | 19 | ## Alexa Notes 20 | - Alexa cannot send custom headers; keep the AutoMem token in `AUTOMEM_API_TOKEN`. 21 | - Sample utterances (Custom model): `remember {note}`, `store {note}`, `recall {query}`, `what do you remember about {query}`. 22 | - Tags applied automatically: `alexa`, plus `user:{userId}` and `device:{deviceId}` when present in the request. 23 | -------------------------------------------------------------------------------- /automem/utils/graph.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Any, Dict 4 | 5 | from automem.utils.scoring import _parse_metadata_field 6 | 7 | 8 | def _serialize_node(node: Any) -> Dict[str, Any]: 9 | properties = getattr(node, "properties", None) 10 | if isinstance(properties, dict): 11 | data = dict(properties) 12 | elif isinstance(node, dict): 13 | data = dict(node) 14 | else: 15 | return {"value": node} 16 | 17 | if "metadata" in data: 18 | data["metadata"] = _parse_metadata_field(data["metadata"]) 19 | 20 | return data 21 | 22 | 23 | def _summarize_relation_node(data: Dict[str, Any]) -> Dict[str, Any]: 24 | summary: Dict[str, Any] = {} 25 | 26 | for key in ("id", "type", "timestamp", "summary", "importance", "confidence"): 27 | if key in data: 28 | summary[key] = data[key] 29 | 30 | content = data.get("content") 31 | if "summary" not in summary and isinstance(content, str): 32 | snippet = content.strip() 33 | if len(snippet) > 160: 34 | snippet = snippet[:157].rsplit(" ", 1)[0] + "…" 35 | summary["content"] = snippet 36 | 37 | tags = data.get("tags") 38 | if isinstance(tags, list) and tags: 39 | summary["tags"] = tags[:5] 40 | 41 | return summary 42 | -------------------------------------------------------------------------------- /automem/stores/vector_store.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import List, Optional 4 | 5 | from qdrant_client import models as qdrant_models 6 | 7 | from automem.utils.tags import _prepare_tag_filters 8 | 9 | 10 | def _build_qdrant_tag_filter( 11 | tags: Optional[List[str]], 12 | mode: str = "any", 13 | match: str = "exact", 14 | ): 15 | """Build a Qdrant filter for tag constraints, supporting mode/match semantics. 16 | 17 | Extracted for reuse by Qdrant interactions. 18 | """ 19 | normalized_tags = _prepare_tag_filters(tags) 20 | if not normalized_tags: 21 | return None 22 | 23 | target_key = "tag_prefixes" if match == "prefix" else "tags" 24 | normalized_mode = "all" if mode == "all" else "any" 25 | 26 | if normalized_mode == "any": 27 | return qdrant_models.Filter( 28 | must=[ 29 | qdrant_models.FieldCondition( 30 | key=target_key, 31 | match=qdrant_models.MatchAny(any=normalized_tags), 32 | ) 33 | ] 34 | ) 35 | 36 | must_conditions = [ 37 | qdrant_models.FieldCondition( 38 | key=target_key, 39 | match=qdrant_models.MatchValue(value=tag), 40 | ) 41 | for tag in normalized_tags 42 | ] 43 | 44 | return qdrant_models.Filter(must=must_conditions) 45 | -------------------------------------------------------------------------------- /automem/stores/graph_store.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | 4 | def _build_graph_tag_predicate(tag_mode: str, tag_match: str) -> str: 5 | """Construct a Cypher predicate for tag filtering with mode/match semantics. 6 | 7 | Mirrors the implementation in app.py. 8 | """ 9 | normalized_mode = "all" if tag_mode == "all" else "any" 10 | normalized_match = "prefix" if tag_match == "prefix" else "exact" 11 | tags_expr = "[tag IN coalesce(m.tags, []) | toLower(tag)]" 12 | 13 | if normalized_match == "exact": 14 | if normalized_mode == "all": 15 | return f"ALL(req IN $tag_filters WHERE req IN {tags_expr})" 16 | return f"ANY(tag IN {tags_expr} WHERE tag IN $tag_filters)" 17 | 18 | prefixes_expr = "coalesce(m.tag_prefixes, [])" 19 | prefix_any = f"ANY(req IN $tag_filters WHERE req IN {prefixes_expr})" 20 | prefix_all = f"ALL(req IN $tag_filters WHERE req IN {prefixes_expr})" 21 | fallback_any = ( 22 | f"ANY(req IN $tag_filters WHERE ANY(tag IN {tags_expr} WHERE tag STARTS WITH req))" 23 | ) 24 | fallback_all = ( 25 | f"ALL(req IN $tag_filters WHERE ANY(tag IN {tags_expr} WHERE tag STARTS WITH req))" 26 | ) 27 | 28 | if normalized_mode == "all": 29 | return ( 30 | f"((size({prefixes_expr}) > 0 AND {prefix_all}) " 31 | f"OR (size({prefixes_expr}) = 0 AND {fallback_all}))" 32 | ) 33 | 34 | return ( 35 | f"((size({prefixes_expr}) > 0 AND {prefix_any}) " 36 | f"OR (size({prefixes_expr}) = 0 AND {fallback_any}))" 37 | ) 38 | -------------------------------------------------------------------------------- /.github/workflows/release-please.yml: -------------------------------------------------------------------------------- 1 | # Release Please - Automated versioning and releases 2 | # 3 | # How it works: 4 | # 1. Every push to main with conventional commits (feat:, fix:, etc.) 5 | # updates a "Release PR" that tracks all unreleased changes 6 | # 2. When you merge the Release PR, it: 7 | # - Updates version in pyproject.toml/setup.py 8 | # - Updates CHANGELOG.md 9 | # - Creates a GitHub Release with tag (v0.9.3, etc.) 10 | # - The release triggers any publish workflows 11 | # 12 | # Commit message format: 13 | # feat: add new feature → minor version bump (0.9.0 → 0.10.0) 14 | # fix: fix a bug → patch version bump (0.9.0 → 0.9.1) 15 | # feat!: breaking change → major version bump (0.9.0 → 1.0.0) 16 | # chore: update deps → no version bump (goes in next release) 17 | # docs: update readme → no version bump 18 | # 19 | # See: https://github.com/googleapis/release-please 20 | 21 | name: Release Please 22 | 23 | on: 24 | push: 25 | branches: 26 | - main 27 | 28 | permissions: 29 | contents: write 30 | pull-requests: write 31 | 32 | jobs: 33 | release-please: 34 | runs-on: ubuntu-latest 35 | outputs: 36 | release_created: ${{ steps.release.outputs.release_created }} 37 | tag_name: ${{ steps.release.outputs.tag_name }} 38 | steps: 39 | - uses: googleapis/release-please-action@v4 40 | id: release 41 | with: 42 | release-type: python 43 | # Optional: specify package name if different from repo 44 | # package-name: automem 45 | -------------------------------------------------------------------------------- /.github/workflows/backup.yml: -------------------------------------------------------------------------------- 1 | name: AutoMem Backup 2 | 3 | on: 4 | schedule: 5 | # Every 6 hours at :00 6 | - cron: "0 */6 * * *" 7 | workflow_dispatch: # Allow manual trigger 8 | 9 | jobs: 10 | backup: 11 | runs-on: ubuntu-latest 12 | timeout-minutes: 30 13 | 14 | steps: 15 | - name: Checkout code 16 | uses: actions/checkout@v4 17 | 18 | - name: Set up Python 19 | uses: actions/setup-python@v4 20 | with: 21 | python-version: "3.11" 22 | 23 | - name: Install dependencies 24 | run: | 25 | pip install --no-cache-dir -r requirements.txt boto3 26 | 27 | - name: Run backup 28 | env: 29 | FALKORDB_HOST: ${{ secrets.FALKORDB_HOST }} 30 | FALKORDB_PORT: ${{ secrets.FALKORDB_PORT }} 31 | FALKORDB_PASSWORD: ${{ secrets.FALKORDB_PASSWORD }} 32 | QDRANT_URL: ${{ secrets.QDRANT_URL }} 33 | QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }} 34 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 35 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 36 | AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} 37 | run: | 38 | python scripts/backup_automem.py \ 39 | --s3-bucket automem-backups \ 40 | --cleanup --keep 14 41 | 42 | - name: Backup summary 43 | if: always() 44 | run: | 45 | echo "✅ Backup completed at $(date)" 46 | ls -lh backups/ || echo "Local backup directory not found" 47 | -------------------------------------------------------------------------------- /automem/utils/tags.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | from typing import Any, List, Optional, Set 5 | 6 | 7 | def _normalize_tag_list(raw: Any) -> List[str]: 8 | if raw is None: 9 | return [] 10 | if isinstance(raw, str): 11 | if not raw.strip(): 12 | return [] 13 | return [part.strip() for part in raw.split(",") if part.strip()] 14 | if isinstance(raw, (list, tuple, set)): 15 | tags: List[str] = [] 16 | for item in raw: 17 | if isinstance(item, str) and item.strip(): 18 | tags.append(item.strip()) 19 | return tags 20 | return [] 21 | 22 | 23 | def _expand_tag_prefixes(tag: str) -> List[str]: 24 | """Expand a tag into all prefixes using ':' as the canonical delimiter.""" 25 | parts = re.split(r"[:/]", tag) 26 | prefixes: List[str] = [] 27 | accumulator: List[str] = [] 28 | for part in parts: 29 | if not part: 30 | continue 31 | accumulator.append(part) 32 | prefixes.append(":".join(accumulator)) 33 | return prefixes 34 | 35 | 36 | def _compute_tag_prefixes(tags: List[str]) -> List[str]: 37 | """Compute unique, lowercased tag prefixes for fast prefix filtering.""" 38 | seen: Set[str] = set() 39 | prefixes: List[str] = [] 40 | for tag in tags or []: 41 | normalized = (tag or "").strip().lower() 42 | if not normalized: 43 | continue 44 | for prefix in _expand_tag_prefixes(normalized): 45 | if prefix not in seen: 46 | seen.add(prefix) 47 | prefixes.append(prefix) 48 | return prefixes 49 | 50 | 51 | def _prepare_tag_filters(tag_filters: Optional[List[str]]) -> List[str]: 52 | """Normalize incoming tag filters for matching and persistence.""" 53 | return [ 54 | tag.strip().lower() for tag in (tag_filters or []) if isinstance(tag, str) and tag.strip() 55 | ] 56 | -------------------------------------------------------------------------------- /automem/embedding/provider.py: -------------------------------------------------------------------------------- 1 | """Base embedding provider interface.""" 2 | 3 | from abc import ABC, abstractmethod 4 | from typing import List 5 | 6 | 7 | class EmbeddingProvider(ABC): 8 | """Abstract base class for embedding providers. 9 | 10 | Provides a common interface for generating embeddings from text, 11 | allowing AutoMem to support multiple embedding backends. 12 | """ 13 | 14 | @abstractmethod 15 | def generate_embedding(self, text: str) -> List[float]: 16 | """Generate an embedding for a single text. 17 | 18 | Args: 19 | text: The text to embed 20 | 21 | Returns: 22 | A list of floats representing the embedding vector 23 | 24 | Raises: 25 | Exception: If embedding generation fails 26 | """ 27 | pass 28 | 29 | @abstractmethod 30 | def generate_embeddings_batch(self, texts: List[str]) -> List[List[float]]: 31 | """Generate embeddings for multiple texts in a single batch. 32 | 33 | Args: 34 | texts: List of texts to embed 35 | 36 | Returns: 37 | List of embedding vectors, one per input text 38 | 39 | Raises: 40 | Exception: If batch embedding generation fails 41 | """ 42 | pass 43 | 44 | @abstractmethod 45 | def dimension(self) -> int: 46 | """Return the dimensionality of embeddings produced by this provider. 47 | 48 | Returns: 49 | The number of dimensions in the embedding vectors 50 | """ 51 | pass 52 | 53 | @abstractmethod 54 | def provider_name(self) -> str: 55 | """Return a human-readable name for this provider. 56 | 57 | Returns: 58 | Provider name (e.g., "openai", "fastembed:bge-base-en-v1.5") 59 | """ 60 | pass 61 | 62 | def __repr__(self) -> str: 63 | return f"{self.__class__.__name__}(dimension={self.dimension()}, provider={self.provider_name()})" 64 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Pre-commit hooks for AutoMem 2 | # Install: pip install pre-commit && pre-commit install 3 | # Run manually: pre-commit run --all-files 4 | 5 | repos: 6 | # Code formatting 7 | - repo: https://github.com/psf/black 8 | rev: 24.4.2 9 | hooks: 10 | - id: black 11 | args: [--line-length=100] 12 | 13 | # Import sorting 14 | - repo: https://github.com/pycqa/isort 15 | rev: 5.13.2 16 | hooks: 17 | - id: isort 18 | args: [--profile=black, --line-length=100] 19 | 20 | # Linting - catch syntax errors and undefined names (fast, blocking) 21 | - repo: https://github.com/pycqa/flake8 22 | rev: 7.1.1 23 | hooks: 24 | - id: flake8 25 | args: ["--select=E9,F63,F7,F82", "--show-source", "--max-line-length=100"] 26 | 27 | # Conventional commits - enforce commit message format 28 | - repo: https://github.com/compilerla/conventional-pre-commit 29 | rev: v3.2.0 30 | hooks: 31 | - id: conventional-pre-commit 32 | stages: [commit-msg] 33 | args: [feat, fix, docs, style, refactor, perf, test, build, ci, chore, revert] 34 | 35 | # Trailing whitespace, end of file, YAML syntax 36 | - repo: https://github.com/pre-commit/pre-commit-hooks 37 | rev: v4.6.0 38 | hooks: 39 | - id: trailing-whitespace 40 | - id: end-of-file-fixer 41 | - id: check-yaml 42 | - id: check-added-large-files 43 | args: [--maxkb=500] 44 | - id: check-merge-conflict 45 | - id: debug-statements 46 | 47 | # Security - check for hardcoded secrets 48 | - repo: https://github.com/Yelp/detect-secrets 49 | rev: v1.5.0 50 | hooks: 51 | - id: detect-secrets 52 | args: [--baseline, .secrets.baseline] 53 | exclude: (tests/|\.md$|\.json$) 54 | 55 | # Run tests before push (optional, can be slow) 56 | # Uncomment to enable: 57 | # - repo: local 58 | # hooks: 59 | # - id: pytest 60 | # name: pytest 61 | # entry: pytest tests/ --ignore=tests/benchmarks -x -q 62 | # language: system 63 | # pass_filenames: false 64 | # stages: [push] 65 | -------------------------------------------------------------------------------- /test-live-server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script to run integration tests against the live Railway deployment 3 | 4 | set -e 5 | 6 | NON_INTERACTIVE=0 7 | PYTEST_ARGS=() 8 | 9 | for arg in "$@"; do 10 | case "$arg" in 11 | --non-interactive) 12 | NON_INTERACTIVE=1 13 | ;; 14 | *) 15 | PYTEST_ARGS+=("$arg") 16 | ;; 17 | esac 18 | done 19 | 20 | # Ensure we're in the project directory 21 | cd "$(dirname "$0")" 22 | 23 | # Activate virtual environment if present 24 | if [ -f "venv/bin/activate" ]; then 25 | source venv/bin/activate 26 | fi 27 | 28 | # Get Railway environment variables 29 | echo "🔍 Fetching Railway configuration..." 30 | LIVE_URL=$(railway variables --json | jq -r '.RAILWAY_PUBLIC_DOMAIN // empty' | sed 's/^/https:\/\//') 31 | LIVE_API_TOKEN=$(railway variables --json | jq -r '.AUTOMEM_API_TOKEN // empty') 32 | LIVE_ADMIN_TOKEN=$(railway variables --json | jq -r '.ADMIN_API_TOKEN // empty') 33 | 34 | if [ -z "$LIVE_URL" ] || [ -z "$LIVE_API_TOKEN" ]; then 35 | echo "❌ Error: Could not fetch Railway configuration" 36 | echo " Make sure you're linked to the Railway project: railway link" 37 | exit 1 38 | fi 39 | 40 | echo "🌐 Live server URL: $LIVE_URL" 41 | echo "" 42 | 43 | if [ "$NON_INTERACTIVE" -eq 0 ]; then 44 | # Confirm before running against live 45 | echo "⚠️ WARNING: This will run integration tests against the LIVE production server!" 46 | echo " The tests will create and delete test memories tagged with 'test' and 'integration'." 47 | echo "" 48 | read -p "Are you sure you want to continue? (y/N) " -n 1 -r 49 | echo "" 50 | if [[ ! $REPLY =~ ^[Yy]$ ]]; then 51 | echo "❌ Aborted" 52 | exit 1 53 | fi 54 | fi 55 | 56 | # Set required environment variables 57 | export AUTOMEM_RUN_INTEGRATION_TESTS=1 58 | export AUTOMEM_TEST_BASE_URL="$LIVE_URL" 59 | export AUTOMEM_TEST_API_TOKEN="$LIVE_API_TOKEN" 60 | export AUTOMEM_TEST_ADMIN_TOKEN="$LIVE_ADMIN_TOKEN" 61 | export AUTOMEM_ALLOW_LIVE=1 62 | 63 | # Run the tests 64 | echo "" 65 | echo "🧪 Running integration tests against live server..." 66 | python -m pytest tests/test_integration.py -v "${PYTEST_ARGS[@]}" 67 | 68 | echo "" 69 | echo "✅ Live server tests completed!" 70 | -------------------------------------------------------------------------------- /automem/utils/text.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | from typing import List 5 | 6 | # Common stopwords to exclude from search tokens 7 | SEARCH_STOPWORDS = { 8 | "the", 9 | "and", 10 | "for", 11 | "with", 12 | "that", 13 | "this", 14 | "from", 15 | "into", 16 | "using", 17 | "have", 18 | "will", 19 | "your", 20 | "about", 21 | "after", 22 | "before", 23 | "when", 24 | "then", 25 | "than", 26 | "also", 27 | "just", 28 | "very", 29 | "more", 30 | "less", 31 | "over", 32 | "under", 33 | } 34 | 35 | # Entity-level stopwords and blocklist for extraction filtering 36 | ENTITY_STOPWORDS = { 37 | "you", 38 | "your", 39 | "yours", 40 | "whatever", 41 | "today", 42 | "tomorrow", 43 | "project", 44 | "projects", 45 | "office", 46 | "session", 47 | "meeting", 48 | } 49 | 50 | # Common error codes and technical strings to exclude from entity extraction 51 | ENTITY_BLOCKLIST = { 52 | # HTTP errors 53 | "bad request", 54 | "not found", 55 | "unauthorized", 56 | "forbidden", 57 | "internal server error", 58 | "service unavailable", 59 | "gateway timeout", 60 | # Network errors 61 | "econnreset", 62 | "econnrefused", 63 | "etimedout", 64 | "enotfound", 65 | "enetunreach", 66 | "ehostunreach", 67 | "epipe", 68 | "eaddrinuse", 69 | # Common error patterns 70 | "error", 71 | "warning", 72 | "exception", 73 | "failed", 74 | "failure", 75 | } 76 | 77 | 78 | def _extract_keywords(text: str) -> List[str]: 79 | """Convert a raw query string into normalized keyword tokens.""" 80 | if not text: 81 | return [] 82 | 83 | words = re.findall(r"[A-Za-z0-9_\-]+", text.lower()) 84 | keywords: List[str] = [] 85 | seen: set[str] = set() 86 | 87 | for word in words: 88 | cleaned = word.strip("-_") 89 | if len(cleaned) < 3: 90 | continue 91 | if cleaned in SEARCH_STOPWORDS: 92 | continue 93 | if cleaned in seen: 94 | continue 95 | seen.add(cleaned) 96 | keywords.append(cleaned) 97 | 98 | return keywords 99 | -------------------------------------------------------------------------------- /automem/embedding/placeholder.py: -------------------------------------------------------------------------------- 1 | """Placeholder embedding provider using deterministic hash-based embeddings.""" 2 | 3 | import hashlib 4 | import random 5 | from typing import List 6 | 7 | from automem.embedding.provider import EmbeddingProvider 8 | 9 | 10 | class PlaceholderEmbeddingProvider(EmbeddingProvider): 11 | """Generates deterministic embeddings from content hash. 12 | 13 | This provider creates embeddings without semantic meaning, useful as a 14 | fallback when no real embedding model is available. Embeddings are 15 | deterministic (same content always produces same embedding) but have no 16 | semantic similarity properties. 17 | """ 18 | 19 | def __init__(self, dimension: int = 768): 20 | """Initialize placeholder provider. 21 | 22 | Args: 23 | dimension: Number of dimensions for embedding vectors 24 | """ 25 | self._dimension = dimension 26 | 27 | def generate_embedding(self, text: str) -> List[float]: 28 | """Generate a deterministic embedding from text hash. 29 | 30 | Args: 31 | text: The text to embed 32 | 33 | Returns: 34 | A deterministic vector based on content hash 35 | """ 36 | digest = hashlib.sha256(text.encode("utf-8")).digest() 37 | seed = int.from_bytes(digest[:8], "little", signed=False) 38 | rng = random.Random( 39 | seed 40 | ) # nosec: B311 - Deterministic RNG is intentional for placeholder embeddings 41 | return [rng.random() for _ in range(self._dimension)] 42 | 43 | def generate_embeddings_batch(self, texts: List[str]) -> List[List[float]]: 44 | """Generate embeddings for multiple texts. 45 | 46 | Args: 47 | texts: List of texts to embed 48 | 49 | Returns: 50 | List of deterministic vectors 51 | """ 52 | return [self.generate_embedding(text) for text in texts] 53 | 54 | def dimension(self) -> int: 55 | """Return embedding dimensionality. 56 | 57 | Returns: 58 | The number of dimensions in the embedding vectors 59 | """ 60 | return self._dimension 61 | 62 | def provider_name(self) -> str: 63 | """Return provider name. 64 | 65 | Returns: 66 | Provider identifier 67 | """ 68 | return "placeholder" 69 | -------------------------------------------------------------------------------- /automem/api/enrichment.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Any, Callable 4 | 5 | from flask import Blueprint, abort, jsonify, request 6 | 7 | 8 | def create_enrichment_blueprint( 9 | require_admin_token: Callable[[], None], 10 | state: Any, 11 | enqueue_enrichment: Callable[..., None], 12 | max_attempts: int, 13 | ) -> Blueprint: 14 | bp = Blueprint("enrichment", __name__) 15 | 16 | @bp.route("/enrichment/status", methods=["GET"]) 17 | def enrichment_status() -> Any: 18 | queue_size = state.enrichment_queue.qsize() if state.enrichment_queue else 0 19 | thread_alive = bool(state.enrichment_thread and state.enrichment_thread.is_alive()) 20 | 21 | with state.enrichment_lock: 22 | pending = len(state.enrichment_pending) 23 | inflight = len(state.enrichment_inflight) 24 | 25 | response = { 26 | "status": "running" if thread_alive else "stopped", 27 | "queue_size": queue_size, 28 | "pending": pending, 29 | "inflight": inflight, 30 | "max_attempts": max_attempts, 31 | "stats": state.enrichment_stats.to_dict(), 32 | } 33 | return jsonify(response) 34 | 35 | @bp.route("/enrichment/reprocess", methods=["POST"]) 36 | def enrichment_reprocess() -> Any: 37 | require_admin_token() 38 | 39 | payload = request.get_json(silent=True) or {} 40 | ids: set[str] = set() 41 | 42 | raw_ids = payload.get("ids") or request.args.get("ids") 43 | if isinstance(raw_ids, str): 44 | ids.update(part.strip() for part in raw_ids.split(",") if part.strip()) 45 | elif isinstance(raw_ids, list): 46 | for item in raw_ids: 47 | if isinstance(item, str) and item.strip(): 48 | ids.add(item.strip()) 49 | 50 | if not ids: 51 | abort(400, description="No memory ids provided for reprocessing") 52 | 53 | for memory_id in ids: 54 | enqueue_enrichment(memory_id, forced=True) 55 | 56 | return ( 57 | jsonify( 58 | { 59 | "status": "queued", 60 | "count": len(ids), 61 | "ids": sorted(ids), 62 | } 63 | ), 64 | 202, 65 | ) 66 | 67 | return bp 68 | -------------------------------------------------------------------------------- /docs/AGENT_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Long-Haul Agent Template 2 | 3 | Reusable playbook for long-running coding sessions with GitHub PRs and CodeRabit reviews. Copy into repo-level `AGENTS.md` and fill the placeholders. 4 | 5 | ## Scope & Guardrails 6 | - Repos in scope: . Allowed commands: . Secrets: env vars only; never log or commit secrets. 7 | - Stop conditions: