├── CLAUDE.md ├── src └── aijournal │ ├── py.typed │ ├── __init__.py │ ├── services │ ├── __init__.py │ ├── capture │ │ ├── stages │ │ │ ├── __init__.py │ │ │ ├── stage1_normalize.py │ │ │ ├── stage8_pack.py │ │ │ ├── stage2_summarize.py │ │ │ ├── stage3_facts.py │ │ │ └── stage6_index.py │ │ └── results.py │ ├── microfacts │ │ ├── __init__.py │ │ └── snapshot.py │ └── embedding.py │ ├── api │ ├── __init__.py │ ├── capture.py │ └── chat.py │ ├── common │ ├── __init__.py │ ├── types.py │ ├── base.py │ ├── constants.py │ ├── meta.py │ └── command_runner.py │ ├── io │ ├── __init__.py │ └── yaml_io.py │ ├── utils │ ├── __init__.py │ ├── coercion.py │ ├── time.py │ └── text.py │ ├── commands │ └── __init__.py │ ├── pipelines │ ├── __init__.py │ ├── advise.py │ └── summarize.py │ ├── models │ ├── __init__.py │ ├── base.py │ ├── claim_atoms.py │ ├── derived.py │ └── authoritative.py │ ├── domain │ ├── __init__.py │ ├── evidence.py │ ├── enums.py │ ├── persona.py │ ├── packs.py │ ├── chat.py │ ├── journal.py │ ├── advice.py │ ├── claims.py │ ├── index.py │ ├── chat_sessions.py │ ├── facts.py │ └── events.py │ ├── simulator │ └── __init__.py │ ├── _version.py │ └── schema.py ├── .python-version ├── .envrc ├── tests ├── ci │ └── __init__.py ├── __init__.py ├── domain │ └── __init__.py ├── fixtures │ ├── miniwk │ │ ├── __init__.py │ │ ├── expected_retrieval.json │ │ ├── data │ │ │ ├── manifest │ │ │ │ └── ingested.yaml │ │ │ └── normalized │ │ │ │ ├── 2025-02-03 │ │ │ │ └── 2025-02-03-focus.yaml │ │ │ │ ├── 2025-02-04 │ │ │ │ └── 2025-02-04-reflection.yaml │ │ │ │ └── 2025-02-05 │ │ │ │ └── 2025-02-05-planning.yaml │ │ └── config.yaml │ └── __init__.py ├── io_tests │ ├── __init__.py │ └── test_artifacts.py ├── scripts │ ├── __init__.py │ └── test_check_structured_metrics.py ├── simulator │ ├── __init__.py │ └── test_human_simulator.py ├── commands │ ├── __init__.py │ └── test_microfact_prompts.py ├── pipelines │ ├── __init__.py │ ├── test_advise.py │ ├── test_summarize.py │ └── test_persona.py ├── common │ ├── __init__.py │ └── test_meta.py ├── prompts │ ├── __init__.py │ └── test_metadata_filtering.py ├── services │ ├── __init__.py │ ├── capture │ │ ├── __init__.py │ │ ├── test_summary_policy.py │ │ ├── test_graceful_profile_update.py │ │ ├── test_stage_summarize.py │ │ ├── test_stage_persona.py │ │ └── test_stage_facts.py │ └── test_claim_id_generation.py ├── test_sanity.py ├── test_coercion.py ├── test_schema_validation.py ├── test_cli_version.py ├── test_yaml_io.py ├── README.md ├── test_api_capture.py ├── test_cli_simulator.py ├── conftest.py ├── test_claim_atoms.py ├── test_cli_ollama_health.py └── test_cli_microfacts.py ├── scripts └── __init__.py ├── schemas └── core │ ├── aijournal.models.base.AijournalModel.json │ ├── aijournal.domain.evidence.SourceRef.json │ ├── aijournal.api.chat.ChatCitationRef.json │ ├── aijournal.domain.packs.TrimmedFile.json │ ├── aijournal.domain.advice.AdviceReference.json │ ├── aijournal.domain.journal.NormalizedEntity.json │ ├── aijournal.domain.claims.Scope.json │ ├── aijournal.domain.packs.PackEntry.json │ ├── aijournal.services.retriever.RetrievalMeta.json │ ├── aijournal.domain.chat.ChatTelemetry.json │ ├── aijournal.domain.facts.MicrofactConsolidationSummary.json │ ├── aijournal.domain.facts.DailySummary.json │ ├── aijournal.domain.events.FeedbackAdjustmentEvent.json │ ├── aijournal.domain.journal.Section.json │ ├── aijournal.domain.persona.InterviewQuestion.json │ ├── aijournal.domain.events.ClaimSignaturePayload.json │ ├── aijournal.domain.prompts.PromptMicroFact.json │ ├── aijournal.services.consolidator.ClaimSignature.json │ ├── aijournal.services.retriever.RetrievalFilters.json │ ├── aijournal.models.derived.ProfileUpdateInput.json │ ├── aijournal.domain.packs.PackMeta.json │ ├── aijournal.api.chat.ChatCitation.json │ ├── aijournal.models.authoritative.JournalEntry.json │ ├── aijournal.domain.claims.Provenance.json │ ├── aijournal.domain.prompts.PromptFacetItem.json │ ├── aijournal.domain.facts.MicrofactConsolidationLog.json │ ├── aijournal.domain.facts.MicroFact.json │ ├── aijournal.domain.advice.AdviceRecommendation.json │ ├── aijournal.domain.persona.InterviewSet.json │ ├── aijournal.api.chat.ChatResponse.json │ ├── aijournal.models.authoritative.SelfProfile.json │ ├── aijournal.common.meta.LLMResult.json │ ├── aijournal.domain.facts.ConsolidatedMicroFact.json │ ├── aijournal.domain.index.Chunk.json │ ├── aijournal.domain.events.FeedbackBatch.json │ ├── aijournal.domain.index.RetrievedChunk.json │ ├── aijournal.domain.chat_sessions.ChatLearningEntry.json │ ├── aijournal.common.meta.ArtifactMeta.json │ ├── aijournal.domain.changes.ClaimAtomInput.json │ ├── aijournal.models.authoritative.ManifestEntry.json │ ├── aijournal.domain.chat_sessions.ChatSessionSummary.json │ ├── aijournal.api.chat.ChatRequest.json │ ├── aijournal.domain.chat_sessions.ChatTranscriptTurn.json │ ├── aijournal.services.consolidator.ClaimConflict.json │ ├── aijournal.domain.events.ClaimConflictPayload.json │ ├── aijournal.domain.changes.FacetChange.json │ ├── aijournal.domain.packs.PackBundle.json │ ├── aijournal.domain.index.ChunkBatch.json │ └── aijournal.domain.facts.ConsolidatedMicrofactsFile.json ├── .githooks └── pre-push ├── .gitignore ├── prompts └── examples │ ├── extract_facts.json │ ├── summarize.json │ ├── interview.json │ ├── profile_update.json │ └── advise.json ├── .github └── workflows │ ├── schema.yml │ ├── ci.yml │ └── release.yml ├── .pre-commit-config.yaml ├── justfile └── docs ├── prompt_improvement_request.md ├── archive └── 2025-10-29_CLI_MIGRATION.md └── design └── profile_update_inventory.md /CLAUDE.md: -------------------------------------------------------------------------------- 1 | AGENTS.md -------------------------------------------------------------------------------- /src/aijournal/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /src/aijournal/__init__.py: -------------------------------------------------------------------------------- 1 | """aijournal package exports.""" 2 | -------------------------------------------------------------------------------- /.envrc: -------------------------------------------------------------------------------- 1 | source .venv/bin/activate 2 | AIJOURNAL_WORKSPACE=workspace 3 | -------------------------------------------------------------------------------- /src/aijournal/services/__init__.py: -------------------------------------------------------------------------------- 1 | """Service utilities for aijournal.""" 2 | -------------------------------------------------------------------------------- /src/aijournal/api/__init__.py: -------------------------------------------------------------------------------- 1 | """Public API schemas exposed by aijournal services.""" 2 | -------------------------------------------------------------------------------- /src/aijournal/common/__init__.py: -------------------------------------------------------------------------------- 1 | """Common primitives shared across aijournal modules.""" 2 | -------------------------------------------------------------------------------- /src/aijournal/io/__init__.py: -------------------------------------------------------------------------------- 1 | """I/O helpers for YAML, artifacts, and related formats.""" 2 | -------------------------------------------------------------------------------- /tests/ci/__init__.py: -------------------------------------------------------------------------------- 1 | """Mark CI-focused tests as a concrete package for importers.""" 2 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- 1 | """Expose helper scripts as an importable package for tooling hooks.""" 2 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Explicitly package the test suite so imports resolve consistently.""" 2 | -------------------------------------------------------------------------------- /tests/domain/__init__.py: -------------------------------------------------------------------------------- 1 | """Bundle domain-level tests into a package to simplify discovery.""" 2 | -------------------------------------------------------------------------------- /tests/fixtures/miniwk/__init__.py: -------------------------------------------------------------------------------- 1 | """Package marker for the mini workspace fixture bundle.""" 2 | -------------------------------------------------------------------------------- /tests/io_tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Mark IO tests as a concrete package for linting purposes.""" 2 | -------------------------------------------------------------------------------- /tests/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | """Group scripted test helpers into a package for import clarity.""" 2 | -------------------------------------------------------------------------------- /tests/simulator/__init__.py: -------------------------------------------------------------------------------- 1 | """Group simulator tests into a concrete package for discovery.""" 2 | -------------------------------------------------------------------------------- /src/aijournal/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Stateless helpers shared across CLI, services, and pipelines.""" 2 | -------------------------------------------------------------------------------- /tests/fixtures/__init__.py: -------------------------------------------------------------------------------- 1 | """Publish fixture helpers as a package for cleaner relative imports.""" 2 | -------------------------------------------------------------------------------- /tests/commands/__init__.py: -------------------------------------------------------------------------------- 1 | """Keep command tests in a real package so shared helpers import cleanly.""" 2 | -------------------------------------------------------------------------------- /tests/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | """Keep pipeline tests grouped in a package for consistent discovery.""" 2 | -------------------------------------------------------------------------------- /tests/common/__init__.py: -------------------------------------------------------------------------------- 1 | """Treat shared test utilities as a true package for straightforward imports.""" 2 | -------------------------------------------------------------------------------- /tests/prompts/__init__.py: -------------------------------------------------------------------------------- 1 | """Allow prompt tests to act as a package and avoid implicit-namespace lint.""" 2 | -------------------------------------------------------------------------------- /tests/services/__init__.py: -------------------------------------------------------------------------------- 1 | """Expose the services tests as a package so linting tools resolve imports.""" 2 | -------------------------------------------------------------------------------- /src/aijournal/commands/__init__.py: -------------------------------------------------------------------------------- 1 | """Command modules provide the Typer-facing orchestration for each feature.""" 2 | -------------------------------------------------------------------------------- /tests/services/capture/__init__.py: -------------------------------------------------------------------------------- 1 | """Capture-service tests live here as a proper package for importer parity.""" 2 | -------------------------------------------------------------------------------- /src/aijournal/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | """Pipeline modules orchestrate end-to-end workflows for specific CLI commands.""" 2 | -------------------------------------------------------------------------------- /src/aijournal/services/capture/stages/__init__.py: -------------------------------------------------------------------------------- 1 | """Capture stage implementations exposed to the pipeline runner.""" 2 | -------------------------------------------------------------------------------- /tests/test_sanity.py: -------------------------------------------------------------------------------- 1 | def test_sanity() -> None: 2 | """Placeholder to keep pytest happy until real tests exist.""" 3 | assert True 4 | -------------------------------------------------------------------------------- /src/aijournal/common/types.py: -------------------------------------------------------------------------------- 1 | """Common typing aliases shared across the project.""" 2 | 3 | ISODateStr = str # 'YYYY-MM-DD' 4 | TimestampStr = str # ISO8601 string 5 | -------------------------------------------------------------------------------- /tests/fixtures/miniwk/expected_retrieval.json: -------------------------------------------------------------------------------- 1 | { 2 | "query": "focus blocks", 3 | "top": 5, 4 | "expected_chunk_ids": [ 5 | "2025-02-04-reflection#c0" 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /src/aijournal/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Pydantic model package; import from explicit submodules (authoritative, derived, claim_atoms).""" 2 | 3 | # Intentionally left empty to avoid implicit re-exports. 4 | -------------------------------------------------------------------------------- /schemas/core/aijournal.models.base.AijournalModel.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Project-specific base model that inherits strict settings.", 3 | "properties": {}, 4 | "title": "AijournalModel", 5 | "type": "object" 6 | } 7 | -------------------------------------------------------------------------------- /src/aijournal/domain/__init__.py: -------------------------------------------------------------------------------- 1 | """Domain models live in explicit submodules (changes, evidence, facts, persona, etc.).""" 2 | 3 | # This package intentionally avoids re-export magic; import directly from the submodule you need. 4 | -------------------------------------------------------------------------------- /.githooks/pre-push: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | 5 | echo "Running schema guard..." 6 | uv run python scripts/check_schemas.py 7 | 8 | echo "Running pytest..." 9 | uv run pytest -q 10 | 11 | echo "Running pre-commit hooks..." 12 | pre-commit run --all-files 13 | -------------------------------------------------------------------------------- /src/aijournal/models/base.py: -------------------------------------------------------------------------------- 1 | """Shared base model for aijournal Pydantic schemas.""" 2 | 3 | from __future__ import annotations 4 | 5 | from aijournal.common.base import StrictModel 6 | 7 | 8 | class AijournalModel(StrictModel): 9 | """Project-specific base model that inherits strict settings.""" 10 | -------------------------------------------------------------------------------- /tests/fixtures/miniwk/data/manifest/ingested.yaml: -------------------------------------------------------------------------------- 1 | - id: 2025-02-03-focus 2 | hash: hash-2025-02-03-focus 3 | source_type: journal 4 | - id: 2025-02-04-reflection 5 | hash: hash-2025-02-04-reflection 6 | source_type: journal 7 | - id: 2025-02-05-planning 8 | hash: hash-2025-02-05-planning 9 | source_type: journal 10 | -------------------------------------------------------------------------------- /src/aijournal/domain/evidence.py: -------------------------------------------------------------------------------- 1 | """Domain models for evidence source references.""" 2 | 3 | from __future__ import annotations 4 | 5 | from aijournal.common.base import StrictModel 6 | 7 | 8 | class SourceRef(StrictModel): 9 | """Reference to a normalized entry that supports a claim or fact.""" 10 | 11 | entry_id: str 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python/uv 2 | .venv/ 3 | __pycache__/ 4 | *.py[cod] 5 | .ruff_cache/ 6 | .mypy_cache/ 7 | .pytest_cache/ 8 | 9 | # Local tooling 10 | .code/ 11 | 12 | # Data directories (optional to track; ignored by default for privacy) 13 | data/ 14 | derived/ 15 | profile/ 16 | workspace/ 17 | 18 | # OS noise 19 | .DS_Store 20 | Thumbs.db 21 | -------------------------------------------------------------------------------- /prompts/examples/extract_facts.json: -------------------------------------------------------------------------------- 1 | { 2 | "facts": [ 3 | { 4 | "id": "focus-morning-block", 5 | "statement": "Morning focus blocks improve deep-work output.", 6 | "confidence": 0.82, 7 | "evidence_entry": "2025-10-25-focus-log", 8 | "evidence_para": 0 9 | } 10 | ], 11 | "claim_proposals": [] 12 | } 13 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.evidence.SourceRef.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Reference to a normalized entry that supports a claim or fact.", 3 | "properties": { 4 | "entry_id": { 5 | "title": "Entry Id", 6 | "type": "string" 7 | } 8 | }, 9 | "required": [ 10 | "entry_id" 11 | ], 12 | "title": "SourceRef", 13 | "type": "object" 14 | } 15 | -------------------------------------------------------------------------------- /schemas/core/aijournal.api.chat.ChatCitationRef.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Reference emitted by the LLM; resolved against retrieved chunks.", 3 | "properties": { 4 | "code": { 5 | "minLength": 1, 6 | "title": "Code", 7 | "type": "string" 8 | } 9 | }, 10 | "required": [ 11 | "code" 12 | ], 13 | "title": "ChatCitationRef", 14 | "type": "object" 15 | } 16 | -------------------------------------------------------------------------------- /prompts/examples/summarize.json: -------------------------------------------------------------------------------- 1 | { 2 | "day": "2025-10-26", 3 | "bullets": [ 4 | "Confirmed strict schema prompts with JSON fixtures.", 5 | "Documented fallback instructions for summarize output." 6 | ], 7 | "highlights": [ 8 | "LLM outputs now align with DailySummary schema." 9 | ], 10 | "todo_candidates": [ 11 | "Verify persona regeneration after strict schema rollout." 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.packs.TrimmedFile.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Record of a file trimmed due to token budget limits.", 3 | "properties": { 4 | "path": { 5 | "title": "Path", 6 | "type": "string" 7 | }, 8 | "role": { 9 | "title": "Role", 10 | "type": "string" 11 | } 12 | }, 13 | "required": [ 14 | "role", 15 | "path" 16 | ], 17 | "title": "TrimmedFile", 18 | "type": "object" 19 | } 20 | -------------------------------------------------------------------------------- /tests/fixtures/miniwk/data/normalized/2025-02-03/2025-02-03-focus.yaml: -------------------------------------------------------------------------------- 1 | id: 2025-02-03-focus 2 | created_at: "2025-02-03T09:00:00Z" 3 | source_path: data/journal/2025/02/03/2025-02-03-focus.md 4 | title: Focus 5 | tags: 6 | - focus 7 | - planning 8 | summary: Protected two focus blocks for roadmap prep. 9 | sections: 10 | - heading: Highlights 11 | summary: Protected two focus blocks for roadmap prep. 12 | source_hash: hash-2025-02-03-focus 13 | source_type: journal 14 | -------------------------------------------------------------------------------- /prompts/examples/interview.json: -------------------------------------------------------------------------------- 1 | { 2 | "questions": [ 3 | { 4 | "id": "morning-reflection-frequency", 5 | "text": "How often do post-focus reflections get skipped?", 6 | "target_facet": "habits.focus_block", 7 | "priority": "high" 8 | }, 9 | { 10 | "id": "travel-adjustments", 11 | "text": "What changes during travel weeks disrupt focus blocks?", 12 | "target_facet": "claim:goal.focus_hours_per_week", 13 | "priority": "medium" 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /tests/fixtures/miniwk/data/normalized/2025-02-04/2025-02-04-reflection.yaml: -------------------------------------------------------------------------------- 1 | id: 2025-02-04-reflection 2 | created_at: "2025-02-04T08:30:00Z" 3 | source_path: data/journal/2025/02/04/2025-02-04-reflection.md 4 | title: Reflection 5 | tags: 6 | - reflection 7 | - focus 8 | summary: Reflected on how consistent focus blocks supported calm execution. 9 | sections: 10 | - heading: Reflection 11 | summary: Focus rituals kept delivery predictable. 12 | source_hash: hash-2025-02-04-reflection 13 | source_type: journal 14 | -------------------------------------------------------------------------------- /tests/fixtures/miniwk/data/normalized/2025-02-05/2025-02-05-planning.yaml: -------------------------------------------------------------------------------- 1 | id: 2025-02-05-planning 2 | created_at: "2025-02-05T07:45:00Z" 3 | source_path: data/journal/2025/02/05/2025-02-05-planning.md 4 | title: Planning 5 | tags: 6 | - planning 7 | - sprint 8 | summary: Planned sprint tasks and scheduled deep work blocks for writing. 9 | sections: 10 | - heading: Planning 11 | summary: Prioritized sprint stories and held time for writing. 12 | source_hash: hash-2025-02-05-planning 13 | source_type: journal 14 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.advice.AdviceReference.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "References included to ground why advice fits.", 3 | "properties": { 4 | "claims": { 5 | "items": { 6 | "type": "string" 7 | }, 8 | "title": "Claims", 9 | "type": "array" 10 | }, 11 | "facets": { 12 | "items": { 13 | "type": "string" 14 | }, 15 | "title": "Facets", 16 | "type": "array" 17 | } 18 | }, 19 | "title": "AdviceReference", 20 | "type": "object" 21 | } 22 | -------------------------------------------------------------------------------- /src/aijournal/common/base.py: -------------------------------------------------------------------------------- 1 | """Strict Pydantic base classes used across the project.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pydantic import BaseModel, ConfigDict 6 | 7 | 8 | class StrictModel(BaseModel): 9 | """Pydantic model with strict settings and ignored extras.""" 10 | 11 | model_config = ConfigDict( 12 | extra="ignore", 13 | validate_assignment=True, 14 | arbitrary_types_allowed=False, 15 | populate_by_name=True, 16 | protected_namespaces=(), 17 | ) 18 | -------------------------------------------------------------------------------- /src/aijournal/simulator/__init__.py: -------------------------------------------------------------------------------- 1 | """Human-style simulator utilities for exercising the capture pipeline.""" 2 | 3 | from .fixtures import FixtureEntry, FixtureWorkspace, build_fixture_workspace 4 | from .orchestrator import HumanSimulator, SimulationReport 5 | from .validators import ValidationFailure, ValidationReport 6 | 7 | __all__ = [ 8 | "FixtureEntry", 9 | "FixtureWorkspace", 10 | "HumanSimulator", 11 | "SimulationReport", 12 | "ValidationFailure", 13 | "ValidationReport", 14 | "build_fixture_workspace", 15 | ] 16 | -------------------------------------------------------------------------------- /tests/test_coercion.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from aijournal.utils.coercion import coerce_float, coerce_int 4 | 5 | 6 | def test_coerce_float_handles_invalid_values() -> None: 7 | assert coerce_float("1.5") == 1.5 8 | assert coerce_float(None) is None 9 | assert coerce_float("not-a-number") is None 10 | 11 | 12 | def test_coerce_int_handles_invalid_values() -> None: 13 | assert coerce_int("7") == 7 14 | assert coerce_int(None) is None 15 | assert coerce_int(3.9) == 3 16 | assert coerce_int("oops") is None 17 | -------------------------------------------------------------------------------- /.github/workflows/schema.yml: -------------------------------------------------------------------------------- 1 | name: Schema Check 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | jobs: 8 | schema: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v4 12 | 13 | - name: Set up Python 14 | uses: actions/setup-python@v6 15 | with: 16 | python-version: "3.13" 17 | 18 | - name: Install uv 19 | uses: astral-sh/setup-uv@v7 20 | 21 | - name: Sync dependencies 22 | run: uv sync 23 | 24 | - name: Verify schemas 25 | run: uv run python scripts/check_schemas.py 26 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.journal.NormalizedEntity.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Structured entity extracted during normalization.", 3 | "properties": { 4 | "extra": { 5 | "additionalProperties": true, 6 | "title": "Extra", 7 | "type": "object" 8 | }, 9 | "type": { 10 | "title": "Type", 11 | "type": "string" 12 | }, 13 | "value": { 14 | "title": "Value", 15 | "type": "string" 16 | } 17 | }, 18 | "required": [ 19 | "type", 20 | "value" 21 | ], 22 | "title": "NormalizedEntity", 23 | "type": "object" 24 | } 25 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.claims.Scope.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Contextual qualifiers for a claim atom.", 3 | "properties": { 4 | "context": { 5 | "items": { 6 | "type": "string" 7 | }, 8 | "title": "Context", 9 | "type": "array" 10 | }, 11 | "domain": { 12 | "anyOf": [ 13 | { 14 | "type": "string" 15 | }, 16 | { 17 | "type": "null" 18 | } 19 | ], 20 | "default": null, 21 | "title": "Domain" 22 | } 23 | }, 24 | "title": "Scope", 25 | "type": "object" 26 | } 27 | -------------------------------------------------------------------------------- /tests/test_schema_validation.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import pytest 4 | 5 | from aijournal.schema import SchemaValidationError, validate_schema 6 | 7 | 8 | def test_validate_schema_raises_with_aggregate_errors() -> None: 9 | payload = {"unexpected": True} 10 | 11 | with pytest.raises(SchemaValidationError) as excinfo: 12 | validate_schema("summary", payload) 13 | 14 | err = excinfo.value 15 | assert err.schema == "summary" 16 | assert err.errors 17 | assert "Field required" in err.errors[0] 18 | assert "Schema 'summary' validation failed" in str(err) 19 | -------------------------------------------------------------------------------- /src/aijournal/services/microfacts/__init__.py: -------------------------------------------------------------------------------- 1 | """Microfact service utilities (indexing, consolidation, snapshots).""" 2 | 3 | from .index import ( 4 | MicrofactConsolidationStats, 5 | MicrofactIndex, 6 | MicrofactMatch, 7 | MicrofactRebuildResult, 8 | MicrofactRecord, 9 | ) 10 | from .snapshot import load_consolidated_microfacts, select_recurring_facts 11 | 12 | __all__ = [ 13 | "MicrofactConsolidationStats", 14 | "MicrofactIndex", 15 | "MicrofactMatch", 16 | "MicrofactRebuildResult", 17 | "MicrofactRecord", 18 | "load_consolidated_microfacts", 19 | "select_recurring_facts", 20 | ] 21 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_stages: [pre-commit] 2 | exclude: "^(data/|derived/|.venv/|.code/)" 3 | repos: 4 | - repo: local 5 | hooks: 6 | - id: ruff 7 | name: ruff 8 | entry: uvx ruff check --fix 9 | language: system 10 | pass_filenames: true 11 | types: [python] 12 | - id: ruff-format 13 | name: ruff-format 14 | entry: uvx ruff format 15 | language: system 16 | pass_filenames: true 17 | types: [python] 18 | - id: mypy 19 | name: mypy 20 | entry: uv run mypy src 21 | language: system 22 | pass_filenames: false 23 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | pull_request: 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | 14 | - name: Set up Python 15 | uses: actions/setup-python@v5 16 | with: 17 | python-version: "3.13" 18 | 19 | - name: Install uv 20 | uses: astral-sh/setup-uv@v7 21 | 22 | - name: Sync dependencies 23 | run: uv sync 24 | 25 | - name: Run tests 26 | run: uv run pytest -q 27 | 28 | - name: Run pre-commit 29 | run: uvx pre-commit run --all-files 30 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.packs.PackEntry.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Single file included in an export pack.", 3 | "properties": { 4 | "content": { 5 | "title": "Content", 6 | "type": "string" 7 | }, 8 | "path": { 9 | "title": "Path", 10 | "type": "string" 11 | }, 12 | "role": { 13 | "title": "Role", 14 | "type": "string" 15 | }, 16 | "tokens": { 17 | "title": "Tokens", 18 | "type": "integer" 19 | } 20 | }, 21 | "required": [ 22 | "role", 23 | "path", 24 | "tokens", 25 | "content" 26 | ], 27 | "title": "PackEntry", 28 | "type": "object" 29 | } 30 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | environment: 11 | name: pypi 12 | url: https://pypi.org/p/${{ github.repository }} 13 | permissions: 14 | id-token: write 15 | steps: 16 | - uses: actions/checkout@v4 17 | with: 18 | fetch-depth: 0 19 | - name: Install uv 20 | uses: astral-sh/setup-uv@v6 21 | - name: Build 22 | run: uv build 23 | - name: Publish package distributions to PyPI 24 | uses: pypa/gh-action-pypi-publish@release/v1 25 | -------------------------------------------------------------------------------- /schemas/core/aijournal.services.retriever.RetrievalMeta.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Metadata describing a retrieval invocation.", 3 | "properties": { 4 | "fake_mode": { 5 | "title": "Fake Mode", 6 | "type": "boolean" 7 | }, 8 | "k": { 9 | "title": "K", 10 | "type": "integer" 11 | }, 12 | "mode": { 13 | "title": "Mode", 14 | "type": "string" 15 | }, 16 | "source": { 17 | "title": "Source", 18 | "type": "string" 19 | } 20 | }, 21 | "required": [ 22 | "mode", 23 | "source", 24 | "k", 25 | "fake_mode" 26 | ], 27 | "title": "RetrievalMeta", 28 | "type": "object" 29 | } 30 | -------------------------------------------------------------------------------- /tests/test_cli_version.py: -------------------------------------------------------------------------------- 1 | """Smoke test for the `aij version` command.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typer.testing import CliRunner 6 | 7 | from aijournal import _version 8 | from aijournal.cli import app 9 | 10 | 11 | def test_version_command(monkeypatch: CliRunner) -> None: 12 | # Force a deterministic version so we can assert the output 13 | monkeypatch.setattr(_version, "__version__", "9.3.1-test") 14 | runner = CliRunner() 15 | 16 | result = runner.invoke(app, ["version"]) 17 | 18 | assert result.exit_code == 0 19 | assert "aijournal version: 9.3.1-test" in result.stdout 20 | assert "source root:" in result.stdout 21 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | set shell := ["/bin/sh", "-c"] 2 | 3 | default: 4 | @just --help 5 | 6 | test: 7 | uv run pytest -q 8 | 9 | test_cov: 10 | uv run pytest --cov=src -q 11 | 12 | mypy: 13 | uv run mypy src 14 | 15 | lint: 16 | uv run ruff check src tests 17 | 18 | fmt: 19 | uv run ruff format src tests 20 | 21 | health: 22 | uv run aijournal ollama health 23 | 24 | fake_on: 25 | echo "export AIJOURNAL_FAKE_OLLAMA=1" 26 | 27 | ci: 28 | uv run pytest -q && uv run mypy src && uv run python scripts/check_schemas.py 29 | 30 | precommit_dry: 31 | uvx pre-commit run --all-files --show-diff-on-failure 32 | 33 | precommit: 34 | uvx pre-commit run --all-files 35 | -------------------------------------------------------------------------------- /tests/fixtures/miniwk/config.yaml: -------------------------------------------------------------------------------- 1 | model: "gpt-oss:20b" 2 | temperature: 0.2 3 | seed: 42 4 | paths: 5 | data: "data" 6 | profile: "profile" 7 | derived: "derived" 8 | prompts: "prompts" 9 | impact_weights: 10 | values_goals: 1.5 11 | decision_style: 1.3 12 | affect_energy: 1.2 13 | traits: 1.0 14 | social: 0.9 15 | claims: 1.0 16 | claim_types: 17 | value: 1.4 18 | goal: 1.4 19 | boundary: 1.3 20 | trait: 1.2 21 | preference: 1.0 22 | habit: 0.9 23 | aversion: 1.1 24 | skill: 1.0 25 | advisor: 26 | max_recos: 3 27 | include_risks: true 28 | token_estimator: 29 | char_per_token: 4.2 30 | persona: 31 | token_budget: 1200 32 | max_claims: 24 33 | min_claims: 8 34 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.chat.ChatTelemetry.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Telemetry captured during a chat turn.", 3 | "properties": { 4 | "chunk_count": { 5 | "title": "Chunk Count", 6 | "type": "integer" 7 | }, 8 | "model": { 9 | "title": "Model", 10 | "type": "string" 11 | }, 12 | "retrieval_ms": { 13 | "title": "Retrieval Ms", 14 | "type": "number" 15 | }, 16 | "retriever_source": { 17 | "title": "Retriever Source", 18 | "type": "string" 19 | } 20 | }, 21 | "required": [ 22 | "retrieval_ms", 23 | "chunk_count", 24 | "retriever_source", 25 | "model" 26 | ], 27 | "title": "ChatTelemetry", 28 | "type": "object" 29 | } 30 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.facts.MicrofactConsolidationSummary.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Per-day summary emitted during rebuild operations.", 3 | "properties": { 4 | "day": { 5 | "title": "Day", 6 | "type": "string" 7 | }, 8 | "merged_records": { 9 | "title": "Merged Records", 10 | "type": "integer" 11 | }, 12 | "new_records": { 13 | "title": "New Records", 14 | "type": "integer" 15 | }, 16 | "processed": { 17 | "title": "Processed", 18 | "type": "integer" 19 | } 20 | }, 21 | "required": [ 22 | "day", 23 | "processed", 24 | "new_records", 25 | "merged_records" 26 | ], 27 | "title": "MicrofactConsolidationSummary", 28 | "type": "object" 29 | } 30 | -------------------------------------------------------------------------------- /src/aijournal/_version.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pathlib import Path 4 | 5 | __version__ = "" 6 | 7 | if not __version__: 8 | try: 9 | import versioningit 10 | from versioningit.errors import Error as VersioningitError 11 | except ImportError: # pragma: no cover 12 | import importlib.metadata 13 | 14 | __version__ = importlib.metadata.version("aijournal") 15 | else: 16 | PROJECT_DIR = Path(__file__).resolve().parents[2] 17 | try: 18 | __version__ = versioningit.get_version(project_dir=PROJECT_DIR) 19 | except VersioningitError: 20 | import importlib.metadata 21 | 22 | __version__ = importlib.metadata.version("aijournal") 23 | -------------------------------------------------------------------------------- /src/aijournal/utils/coercion.py: -------------------------------------------------------------------------------- 1 | """Lightweight helpers for coercing loosely typed config values.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Any 6 | 7 | 8 | def coerce_float(value: Any) -> float | None: 9 | """Best-effort float conversion; returns None when coercion fails.""" 10 | if value is None: 11 | return None 12 | try: 13 | return float(value) 14 | except (TypeError, ValueError): 15 | return None 16 | 17 | 18 | def coerce_int(value: Any) -> int | None: 19 | """Best-effort int conversion; returns None when coercion fails.""" 20 | if value is None: 21 | return None 22 | try: 23 | return int(value) 24 | except (TypeError, ValueError): 25 | return None 26 | -------------------------------------------------------------------------------- /tests/test_yaml_io.py: -------------------------------------------------------------------------------- 1 | """Behavioral tests for YAML serialization helpers.""" 2 | 3 | from __future__ import annotations 4 | 5 | from aijournal.io.yaml_io import dump_yaml 6 | 7 | 8 | def test_dump_yaml_preserves_unicode_characters() -> None: 9 | payload = {"title": "Karakter – F. Bordewijk 📖"} 10 | 11 | serialized = dump_yaml(payload) 12 | 13 | assert "Karakter – F. Bordewijk 📖" in serialized 14 | assert "\\u" not in serialized 15 | 16 | 17 | def test_dump_yaml_uses_literal_block_for_multiline_strings() -> None: 18 | payload = {"summary": "*1938*\n\n> Op dat ogenblik"} 19 | 20 | serialized = dump_yaml(payload) 21 | 22 | assert "summary: |" in serialized 23 | assert "*1938*" in serialized.splitlines()[1] 24 | assert " > Op dat ogenblik" in serialized 25 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.facts.DailySummary.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Derived day summary (PLAN §4.1).", 3 | "properties": { 4 | "bullets": { 5 | "items": { 6 | "type": "string" 7 | }, 8 | "title": "Bullets", 9 | "type": "array" 10 | }, 11 | "day": { 12 | "title": "Day", 13 | "type": "string" 14 | }, 15 | "highlights": { 16 | "items": { 17 | "type": "string" 18 | }, 19 | "title": "Highlights", 20 | "type": "array" 21 | }, 22 | "todo_candidates": { 23 | "items": { 24 | "type": "string" 25 | }, 26 | "title": "Todo Candidates", 27 | "type": "array" 28 | } 29 | }, 30 | "required": [ 31 | "day" 32 | ], 33 | "title": "DailySummary", 34 | "type": "object" 35 | } 36 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.events.FeedbackAdjustmentEvent.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Record of a claim strength adjustment triggered by chat feedback.", 3 | "properties": { 4 | "claim_id": { 5 | "title": "Claim Id", 6 | "type": "string" 7 | }, 8 | "delta": { 9 | "title": "Delta", 10 | "type": "number" 11 | }, 12 | "kind": { 13 | "default": "feedback", 14 | "title": "Kind", 15 | "type": "string" 16 | }, 17 | "new_strength": { 18 | "title": "New Strength", 19 | "type": "number" 20 | }, 21 | "old_strength": { 22 | "title": "Old Strength", 23 | "type": "number" 24 | } 25 | }, 26 | "required": [ 27 | "claim_id", 28 | "old_strength", 29 | "new_strength", 30 | "delta" 31 | ], 32 | "title": "FeedbackAdjustmentEvent", 33 | "type": "object" 34 | } 35 | -------------------------------------------------------------------------------- /tests/services/capture/test_summary_policy.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from aijournal.services.capture.stages import stage0_persist as stage0 4 | 5 | 6 | def test_missing_summary_uses_first_paragraph() -> None: 7 | body = ( 8 | "First line with extra spaces.\nStill first paragraph.\n\nSecond paragraph ignores this." 9 | ) 10 | summary = stage0._derive_summary_text(None, body) 11 | assert summary == "First line with extra spaces. Still first paragraph." 12 | 13 | 14 | def test_existing_summary_remains_unchanged() -> None: 15 | summary = stage0._derive_summary_text("Custom summary", "Body text") 16 | assert summary == "Custom summary" 17 | 18 | 19 | def test_long_summary_truncates_with_ellipsis() -> None: 20 | body = "Lorem ipsum " * 50 # >400 chars 21 | summary = stage0._derive_summary_text(None, body, max_chars=100) 22 | assert summary.endswith("...") 23 | assert len(summary) <= 103 24 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.journal.Section.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Normalized representation of a markdown heading or section.", 3 | "properties": { 4 | "heading": { 5 | "title": "Heading", 6 | "type": "string" 7 | }, 8 | "level": { 9 | "default": 1, 10 | "title": "Level", 11 | "type": "integer" 12 | }, 13 | "para_index": { 14 | "anyOf": [ 15 | { 16 | "type": "integer" 17 | }, 18 | { 19 | "type": "null" 20 | } 21 | ], 22 | "default": null, 23 | "title": "Para Index" 24 | }, 25 | "summary": { 26 | "anyOf": [ 27 | { 28 | "type": "string" 29 | }, 30 | { 31 | "type": "null" 32 | } 33 | ], 34 | "default": null, 35 | "title": "Summary" 36 | } 37 | }, 38 | "required": [ 39 | "heading" 40 | ], 41 | "title": "Section", 42 | "type": "object" 43 | } 44 | -------------------------------------------------------------------------------- /src/aijournal/domain/enums.py: -------------------------------------------------------------------------------- 1 | """Shared enum types used across aijournal domain models.""" 2 | 3 | from __future__ import annotations 4 | 5 | from enum import StrEnum 6 | 7 | 8 | class ClaimType(StrEnum): 9 | PREFERENCE = "preference" 10 | VALUE = "value" 11 | GOAL = "goal" 12 | BOUNDARY = "boundary" 13 | TRAIT = "trait" 14 | HABIT = "habit" 15 | AVERSION = "aversion" 16 | SKILL = "skill" 17 | 18 | 19 | class ClaimStatus(StrEnum): 20 | ACCEPTED = "accepted" 21 | TENTATIVE = "tentative" 22 | REJECTED = "rejected" 23 | 24 | 25 | class FacetOperation(StrEnum): 26 | SET = "set" 27 | REMOVE = "remove" 28 | MERGE = "merge" 29 | 30 | 31 | class ClaimEventAction(StrEnum): 32 | UPSERT = "upsert" 33 | UPDATE = "update" 34 | DELETE = "delete" 35 | CONFLICT = "conflict" 36 | STRENGTH_DELTA = "strength_delta" 37 | 38 | 39 | class FeedbackDirection(StrEnum): 40 | UP = "up" 41 | DOWN = "down" 42 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.persona.InterviewQuestion.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Structured interview question proposed by the characterization pipeline.", 3 | "properties": { 4 | "id": { 5 | "title": "Id", 6 | "type": "string" 7 | }, 8 | "priority": { 9 | "anyOf": [ 10 | { 11 | "type": "string" 12 | }, 13 | { 14 | "type": "null" 15 | } 16 | ], 17 | "default": null, 18 | "title": "Priority" 19 | }, 20 | "target_facet": { 21 | "anyOf": [ 22 | { 23 | "type": "string" 24 | }, 25 | { 26 | "type": "null" 27 | } 28 | ], 29 | "default": null, 30 | "title": "Target Facet" 31 | }, 32 | "text": { 33 | "title": "Text", 34 | "type": "string" 35 | } 36 | }, 37 | "required": [ 38 | "id", 39 | "text" 40 | ], 41 | "title": "InterviewQuestion", 42 | "type": "object" 43 | } 44 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.events.ClaimSignaturePayload.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Serialized signature describing the target slot for a claim.", 3 | "properties": { 4 | "claim_type": { 5 | "title": "Claim Type", 6 | "type": "string" 7 | }, 8 | "context": { 9 | "items": { 10 | "type": "string" 11 | }, 12 | "title": "Context", 13 | "type": "array" 14 | }, 15 | "domain": { 16 | "anyOf": [ 17 | { 18 | "type": "string" 19 | }, 20 | { 21 | "type": "null" 22 | } 23 | ], 24 | "default": null, 25 | "title": "Domain" 26 | }, 27 | "predicate": { 28 | "title": "Predicate", 29 | "type": "string" 30 | }, 31 | "subject": { 32 | "title": "Subject", 33 | "type": "string" 34 | } 35 | }, 36 | "required": [ 37 | "claim_type", 38 | "subject", 39 | "predicate" 40 | ], 41 | "title": "ClaimSignaturePayload", 42 | "type": "object" 43 | } 44 | -------------------------------------------------------------------------------- /src/aijournal/domain/persona.py: -------------------------------------------------------------------------------- 1 | """Persona and interview domain models for strict schema alignment.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Any 6 | 7 | from pydantic import Field 8 | 9 | from aijournal.common.base import StrictModel 10 | from aijournal.domain.claims import ClaimAtom # noqa: TC001 11 | 12 | 13 | class PersonaCore(StrictModel): 14 | """Primary persona payload used by chat/advise pipelines.""" 15 | 16 | profile: dict[str, Any] = Field(default_factory=dict) 17 | claims: list[ClaimAtom] = Field(default_factory=list) 18 | 19 | 20 | class InterviewQuestion(StrictModel): 21 | """Structured interview question proposed by the characterization pipeline.""" 22 | 23 | id: str 24 | text: str 25 | target_facet: str | None = None 26 | priority: str | None = None 27 | 28 | 29 | class InterviewSet(StrictModel): 30 | """Collection of interview questions to review with the operator.""" 31 | 32 | questions: list[InterviewQuestion] = Field(default_factory=list) 33 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.prompts.PromptMicroFact.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Lightweight micro-fact emitted by the LLM.", 3 | "properties": { 4 | "confidence": { 5 | "anyOf": [ 6 | { 7 | "maximum": 1.0, 8 | "minimum": 0.0, 9 | "type": "number" 10 | }, 11 | { 12 | "type": "null" 13 | } 14 | ], 15 | "default": null, 16 | "title": "Confidence" 17 | }, 18 | "evidence_entry": { 19 | "anyOf": [ 20 | { 21 | "type": "string" 22 | }, 23 | { 24 | "type": "null" 25 | } 26 | ], 27 | "default": null, 28 | "title": "Evidence Entry" 29 | }, 30 | "id": { 31 | "title": "Id", 32 | "type": "string" 33 | }, 34 | "statement": { 35 | "maxLength": 500, 36 | "title": "Statement", 37 | "type": "string" 38 | } 39 | }, 40 | "required": [ 41 | "id", 42 | "statement" 43 | ], 44 | "title": "PromptMicroFact", 45 | "type": "object" 46 | } 47 | -------------------------------------------------------------------------------- /schemas/core/aijournal.services.consolidator.ClaimSignature.json: -------------------------------------------------------------------------------- 1 | { 2 | "properties": { 3 | "claim_type": { 4 | "title": "Claim Type", 5 | "type": "string" 6 | }, 7 | "predicate": { 8 | "title": "Predicate", 9 | "type": "string" 10 | }, 11 | "scope": { 12 | "maxItems": 2, 13 | "minItems": 2, 14 | "prefixItems": [ 15 | { 16 | "anyOf": [ 17 | { 18 | "type": "string" 19 | }, 20 | { 21 | "type": "null" 22 | } 23 | ] 24 | }, 25 | { 26 | "items": { 27 | "type": "string" 28 | }, 29 | "type": "array" 30 | } 31 | ], 32 | "title": "Scope", 33 | "type": "array" 34 | }, 35 | "subject": { 36 | "title": "Subject", 37 | "type": "string" 38 | } 39 | }, 40 | "required": [ 41 | "claim_type", 42 | "subject", 43 | "predicate", 44 | "scope" 45 | ], 46 | "title": "ClaimSignature", 47 | "type": "object" 48 | } 49 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Tests 2 | 3 | `uv run pytest -q` exercises the full CLI surface plus the shared Pydantic helpers. Most test 4 | modules export a `_has_command` guard so partially implemented commands can be skipped without 5 | breaking the suite. 6 | 7 | Key suites: 8 | 9 | - `tests/test_models_io.py` — round-trip coverage for every Pydantic model. Ensures the runtime 10 | validation remains aligned with the Python definitions. 11 | - `tests/test_cli_*.py` — functional coverage for init/new/ingest/normalize/summarize/facts/profile 12 | flows, all running with `AIJOURNAL_FAKE_OLLAMA=1` so CI never needs a model. 13 | - `tests/test_cli_pack.py` — validates packing logic, trim ordering, and token budgeting. 14 | 15 | When developing locally, set `AIJOURNAL_FAKE_OLLAMA=1` before running tests to avoid hitting a live 16 | model: 17 | 18 | ```sh 19 | export AIJOURNAL_FAKE_OLLAMA=1 20 | uv run pytest -q 21 | ``` 22 | 23 | The CLI automatically falls back to fake fixtures if an Ollama call fails, but exporting the env var 24 | keeps results deterministic for golden snapshots. 25 | -------------------------------------------------------------------------------- /prompts/examples/profile_update.json: -------------------------------------------------------------------------------- 1 | { 2 | "claims": [ 3 | { 4 | "type": "habit", 5 | "statement": "Blocks 8–10am for focus sprints three mornings a week.", 6 | "subject": "focus blocks", 7 | "predicate": "maintains", 8 | "strength": 0.72, 9 | "status": "tentative", 10 | "scope_domain": "work", 11 | "scope_context": [ 12 | "weekday", 13 | "solo" 14 | ], 15 | "reason": "Entries + summary cite recurring 8–10am deep work blocks.", 16 | "evidence_entry": "2025-10-28-focus-reset", 17 | "evidence_para": 0 18 | } 19 | ], 20 | "facets": [ 21 | { 22 | "path": "planning.focus_blocks.morning", 23 | "operation": "set", 24 | "value": "Protects 8–10am Tue–Thu for uninterrupted build work.", 25 | "reason": "Microfact + highlights stress morning protection ritual.", 26 | "evidence_entry": "2025-10-28-focus-reset", 27 | "evidence_para": 1 28 | } 29 | ], 30 | "interview_prompts": [ 31 | "What exceptions force skipping the 8–10am block?" 32 | ] 33 | } 34 | -------------------------------------------------------------------------------- /schemas/core/aijournal.services.retriever.RetrievalFilters.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Optional filters applied during retrieval.", 3 | "properties": { 4 | "date_from": { 5 | "anyOf": [ 6 | { 7 | "type": "string" 8 | }, 9 | { 10 | "type": "null" 11 | } 12 | ], 13 | "default": null, 14 | "title": "Date From" 15 | }, 16 | "date_to": { 17 | "anyOf": [ 18 | { 19 | "type": "string" 20 | }, 21 | { 22 | "type": "null" 23 | } 24 | ], 25 | "default": null, 26 | "title": "Date To" 27 | }, 28 | "source_types": { 29 | "items": { 30 | "type": "string" 31 | }, 32 | "title": "Source Types", 33 | "type": "array", 34 | "uniqueItems": true 35 | }, 36 | "tags": { 37 | "items": { 38 | "type": "string" 39 | }, 40 | "title": "Tags", 41 | "type": "array", 42 | "uniqueItems": true 43 | } 44 | }, 45 | "title": "RetrievalFilters", 46 | "type": "object" 47 | } 48 | -------------------------------------------------------------------------------- /src/aijournal/domain/packs.py: -------------------------------------------------------------------------------- 1 | """Strict models representing export packs.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pydantic import Field 6 | 7 | from aijournal.common.base import StrictModel 8 | from aijournal.common.types import TimestampStr # noqa: TC001 9 | 10 | 11 | class PackEntry(StrictModel): 12 | """Single file included in an export pack.""" 13 | 14 | role: str 15 | path: str 16 | tokens: int 17 | content: str 18 | 19 | 20 | class TrimmedFile(StrictModel): 21 | """Record of a file trimmed due to token budget limits.""" 22 | 23 | role: str 24 | path: str 25 | 26 | 27 | class PackMeta(StrictModel): 28 | """Metadata describing the assembled pack.""" 29 | 30 | total_tokens: int 31 | max_tokens: int 32 | trimmed: list[TrimmedFile] = Field(default_factory=list) 33 | generated_at: TimestampStr 34 | 35 | 36 | class PackBundle(StrictModel): 37 | """Structured representation of a pack export.""" 38 | 39 | level: str 40 | date: str 41 | files: list[PackEntry] = Field(default_factory=list) 42 | meta: PackMeta 43 | -------------------------------------------------------------------------------- /src/aijournal/models/claim_atoms.py: -------------------------------------------------------------------------------- 1 | """Compatibility shim re-exporting strict claim models from `aijournal.domain.claims`.""" 2 | 3 | from __future__ import annotations 4 | 5 | import warnings 6 | 7 | from aijournal.domain.claims import ( 8 | ClaimAtom as _ClaimAtom, 9 | ) 10 | from aijournal.domain.claims import ( 11 | ClaimAtomsFile as _ClaimAtomsFile, 12 | ) 13 | from aijournal.domain.claims import ClaimSource 14 | from aijournal.domain.claims import ( 15 | Provenance as _Provenance, 16 | ) 17 | from aijournal.domain.claims import ( 18 | Scope as _Scope, 19 | ) 20 | from aijournal.domain.enums import ClaimStatus, ClaimType 21 | 22 | warnings.warn( 23 | "Import claim models from `aijournal.domain.claims` instead of `aijournal.models.claim_atoms`.", 24 | DeprecationWarning, 25 | stacklevel=2, 26 | ) 27 | 28 | Scope = _Scope 29 | Provenance = _Provenance 30 | ClaimAtom = _ClaimAtom 31 | ClaimAtomsFile = _ClaimAtomsFile 32 | 33 | __all__ = [ 34 | "ClaimAtom", 35 | "ClaimAtomsFile", 36 | "ClaimSource", 37 | "ClaimStatus", 38 | "ClaimType", 39 | "Provenance", 40 | "Scope", 41 | ] 42 | -------------------------------------------------------------------------------- /schemas/core/aijournal.models.derived.ProfileUpdateInput.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Normalized entry metadata captured in a characterization batch.", 3 | "properties": { 4 | "id": { 5 | "title": "Id", 6 | "type": "string" 7 | }, 8 | "manifest_hash": { 9 | "anyOf": [ 10 | { 11 | "type": "string" 12 | }, 13 | { 14 | "type": "null" 15 | } 16 | ], 17 | "default": null, 18 | "title": "Manifest Hash" 19 | }, 20 | "normalized_path": { 21 | "title": "Normalized Path", 22 | "type": "string" 23 | }, 24 | "source_hash": { 25 | "anyOf": [ 26 | { 27 | "type": "string" 28 | }, 29 | { 30 | "type": "null" 31 | } 32 | ], 33 | "default": null, 34 | "title": "Source Hash" 35 | }, 36 | "tags": { 37 | "items": { 38 | "type": "string" 39 | }, 40 | "title": "Tags", 41 | "type": "array" 42 | } 43 | }, 44 | "required": [ 45 | "id", 46 | "normalized_path" 47 | ], 48 | "title": "ProfileUpdateInput", 49 | "type": "object" 50 | } 51 | -------------------------------------------------------------------------------- /tests/test_api_capture.py: -------------------------------------------------------------------------------- 1 | """Tests for the public capture API models.""" 2 | 3 | from __future__ import annotations 4 | 5 | from aijournal.api.capture import CaptureInput, CaptureRequest 6 | 7 | 8 | def test_capture_request_has_no_stage_fields() -> None: 9 | """The public request schema must not expose internal stage controls.""" 10 | fields = CaptureRequest.model_fields 11 | assert "min_stage" not in fields 12 | assert "max_stage" not in fields 13 | 14 | 15 | def test_capture_request_to_input_conversion() -> None: 16 | """CaptureInput should faithfully extend CaptureRequest data.""" 17 | request = CaptureRequest(source="stdin", text="Hello", tags=["focus"]) 18 | capture_input = CaptureInput.from_request(request, min_stage=2, max_stage=4) 19 | 20 | for key, value in request.model_dump(mode="python").items(): 21 | assert getattr(capture_input, key) == value 22 | assert capture_input.min_stage == 2 23 | assert capture_input.max_stage == 4 24 | 25 | 26 | def test_capture_request_retries_defaults_to_none() -> None: 27 | request = CaptureRequest(source="stdin", text="Body") 28 | assert request.retries is None 29 | -------------------------------------------------------------------------------- /tests/test_cli_simulator.py: -------------------------------------------------------------------------------- 1 | """CLI coverage for the human simulator command.""" 2 | 3 | from __future__ import annotations 4 | 5 | import shutil 6 | from typing import TYPE_CHECKING 7 | 8 | from typer.testing import CliRunner 9 | 10 | from aijournal.cli import app 11 | 12 | if TYPE_CHECKING: 13 | from pathlib import Path 14 | 15 | import pytest 16 | 17 | 18 | def test_cli_ops_dev_human_simulator_runs_full_pipeline( 19 | tmp_path: Path, 20 | monkeypatch: pytest.MonkeyPatch, 21 | ) -> None: 22 | runner = CliRunner() 23 | monkeypatch.setenv("AIJOURNAL_FAKE_OLLAMA", "1") 24 | output = tmp_path / "human-sim" 25 | result = runner.invoke( 26 | app, 27 | [ 28 | "ops", 29 | "dev", 30 | "human-sim", 31 | "--output", 32 | str(output), 33 | "--keep-workspace", 34 | "--max-stage", 35 | "8", 36 | "--pack-level", 37 | "L1", 38 | ], 39 | ) 40 | 41 | assert result.exit_code == 0, result.stdout 42 | assert "Result: PASS" in result.stdout 43 | assert output.exists() 44 | 45 | shutil.rmtree(output, ignore_errors=True) 46 | -------------------------------------------------------------------------------- /src/aijournal/domain/chat.py: -------------------------------------------------------------------------------- 1 | """Domain models for chat turns and telemetry.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pydantic import Field 6 | 7 | from aijournal.api.chat import ChatCitation, ChatResponse # noqa: TC001 8 | from aijournal.common.base import StrictModel 9 | from aijournal.common.types import TimestampStr # noqa: TC001 10 | from aijournal.domain.index import RetrievedChunk # noqa: TC001 11 | from aijournal.domain.persona import PersonaCore # noqa: TC001 12 | 13 | 14 | class ChatTelemetry(StrictModel): 15 | """Telemetry captured during a chat turn.""" 16 | 17 | retrieval_ms: float 18 | chunk_count: int 19 | retriever_source: str 20 | model: str 21 | 22 | 23 | class ChatTurn(StrictModel): 24 | """Structured representation of a chat turn.""" 25 | 26 | question: str 27 | answer: str 28 | response: ChatResponse 29 | persona: PersonaCore 30 | citations: list[ChatCitation] = Field(default_factory=list) 31 | retrieved_chunks: list[RetrievedChunk] = Field(default_factory=list) 32 | fake_mode: bool 33 | intent: str 34 | clarifying_question: str | None = None 35 | telemetry: ChatTelemetry 36 | timestamp: TimestampStr 37 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Shared pytest fixtures for CLI integration tests.""" 2 | 3 | from __future__ import annotations 4 | 5 | from datetime import UTC, datetime 6 | from typing import TYPE_CHECKING 7 | 8 | import pytest 9 | from typer.testing import CliRunner 10 | 11 | from aijournal.cli import app 12 | 13 | if TYPE_CHECKING: 14 | from pathlib import Path 15 | 16 | _FIXED_NOW = datetime(2025, 2, 3, 12, 0, tzinfo=UTC) 17 | 18 | 19 | @pytest.fixture 20 | def cli_runner() -> CliRunner: 21 | """Return a Typer CliRunner for invoking the CLI.""" 22 | return CliRunner() 23 | 24 | 25 | @pytest.fixture 26 | def cli_workspace( 27 | tmp_path: Path, 28 | monkeypatch: pytest.MonkeyPatch, 29 | cli_runner: CliRunner, 30 | ) -> Path: 31 | """Initialize a deterministic CLI workspace inside a temporary directory.""" 32 | monkeypatch.chdir(tmp_path) 33 | monkeypatch.setenv("AIJOURNAL_FAKE_OLLAMA", "1") 34 | monkeypatch.setattr("aijournal.utils.time.now", lambda: _FIXED_NOW) 35 | 36 | result = cli_runner.invoke(app, ["init"]) 37 | if result.exit_code != 0: 38 | msg = f"Failed to initialize CLI workspace: {result.stdout}" 39 | raise RuntimeError(msg) 40 | 41 | return tmp_path 42 | -------------------------------------------------------------------------------- /src/aijournal/utils/time.py: -------------------------------------------------------------------------------- 1 | """Time and formatting helpers shared across aijournal modules.""" 2 | 3 | from __future__ import annotations 4 | 5 | import re 6 | from datetime import UTC, datetime 7 | from typing import TYPE_CHECKING 8 | 9 | if TYPE_CHECKING: 10 | from collections.abc import Callable 11 | 12 | 13 | def now() -> datetime: 14 | """Return the current UTC timestamp.""" 15 | return datetime.now(tz=UTC) 16 | 17 | 18 | def format_timestamp(dt: datetime) -> str: 19 | """Format a datetime into ISO-8601 (UTC) without offset suffix.""" 20 | return dt.strftime("%Y-%m-%dT%H:%M:%SZ") 21 | 22 | 23 | def slugify_title(title: str) -> str: 24 | """Produce a filesystem-friendly slug from free-form text.""" 25 | slug = re.sub(r"[^a-z0-9]+", "-", title.lower()).strip("-") 26 | return slug or "entry" 27 | 28 | 29 | def generate_session_id(clock: Callable[[], datetime] = now) -> str: 30 | """Generate a session identifier using the provided clock.""" 31 | return f"chat-{clock().strftime('%Y%m%d-%H%M%S')}" 32 | 33 | 34 | def created_date(created_at: str) -> str: 35 | """Strip the time component from an ISO-like timestamp string.""" 36 | if "T" in created_at: 37 | return created_at.split("T", 1)[0] 38 | return created_at 39 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.packs.PackMeta.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "TrimmedFile": { 4 | "description": "Record of a file trimmed due to token budget limits.", 5 | "properties": { 6 | "path": { 7 | "title": "Path", 8 | "type": "string" 9 | }, 10 | "role": { 11 | "title": "Role", 12 | "type": "string" 13 | } 14 | }, 15 | "required": [ 16 | "role", 17 | "path" 18 | ], 19 | "title": "TrimmedFile", 20 | "type": "object" 21 | } 22 | }, 23 | "description": "Metadata describing the assembled pack.", 24 | "properties": { 25 | "generated_at": { 26 | "title": "Generated At", 27 | "type": "string" 28 | }, 29 | "max_tokens": { 30 | "title": "Max Tokens", 31 | "type": "integer" 32 | }, 33 | "total_tokens": { 34 | "title": "Total Tokens", 35 | "type": "integer" 36 | }, 37 | "trimmed": { 38 | "items": { 39 | "$ref": "#/$defs/TrimmedFile" 40 | }, 41 | "title": "Trimmed", 42 | "type": "array" 43 | } 44 | }, 45 | "required": [ 46 | "total_tokens", 47 | "max_tokens", 48 | "generated_at" 49 | ], 50 | "title": "PackMeta", 51 | "type": "object" 52 | } 53 | -------------------------------------------------------------------------------- /schemas/core/aijournal.api.chat.ChatCitation.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Reference to a retrieved chunk included in a chat response.", 3 | "properties": { 4 | "chunk_id": { 5 | "title": "Chunk Id", 6 | "type": "string" 7 | }, 8 | "chunk_index": { 9 | "title": "Chunk Index", 10 | "type": "integer" 11 | }, 12 | "chunk_type": { 13 | "title": "Chunk Type", 14 | "type": "string" 15 | }, 16 | "code": { 17 | "title": "Code", 18 | "type": "string" 19 | }, 20 | "date": { 21 | "title": "Date", 22 | "type": "string" 23 | }, 24 | "normalized_id": { 25 | "title": "Normalized Id", 26 | "type": "string" 27 | }, 28 | "score": { 29 | "title": "Score", 30 | "type": "number" 31 | }, 32 | "source_path": { 33 | "title": "Source Path", 34 | "type": "string" 35 | }, 36 | "tags": { 37 | "items": { 38 | "type": "string" 39 | }, 40 | "title": "Tags", 41 | "type": "array" 42 | } 43 | }, 44 | "required": [ 45 | "chunk_id", 46 | "code", 47 | "normalized_id", 48 | "chunk_index", 49 | "source_path", 50 | "date", 51 | "score", 52 | "chunk_type" 53 | ], 54 | "title": "ChatCitation", 55 | "type": "object" 56 | } 57 | -------------------------------------------------------------------------------- /src/aijournal/domain/journal.py: -------------------------------------------------------------------------------- 1 | """Journal domain models for normalized entries and sections.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Any 6 | 7 | from pydantic import Field 8 | 9 | from aijournal.common.base import StrictModel 10 | from aijournal.common.types import TimestampStr # noqa: TC001 11 | 12 | 13 | class Section(StrictModel): 14 | """Normalized representation of a markdown heading or section.""" 15 | 16 | heading: str 17 | level: int = 1 18 | summary: str | None = None 19 | para_index: int | None = None 20 | 21 | 22 | class NormalizedEntity(StrictModel): 23 | """Structured entity extracted during normalization.""" 24 | 25 | type: str 26 | value: str 27 | extra: dict[str, Any] = Field(default_factory=dict) 28 | 29 | 30 | class NormalizedEntry(StrictModel): 31 | """Machine-readable journal entry used throughout pipelines.""" 32 | 33 | id: str 34 | created_at: TimestampStr 35 | source_path: str 36 | title: str 37 | tags: list[str] = Field(default_factory=list) 38 | sections: list[Section] = Field(default_factory=list) 39 | entities: list[NormalizedEntity] = Field(default_factory=list) 40 | summary: str | None = None 41 | content: str | None = None 42 | source_hash: str | None = None 43 | source_type: str | None = None 44 | -------------------------------------------------------------------------------- /schemas/core/aijournal.models.authoritative.JournalEntry.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Human-authored Markdown entry metadata.", 3 | "properties": { 4 | "created_at": { 5 | "title": "Created At", 6 | "type": "string" 7 | }, 8 | "id": { 9 | "title": "Id", 10 | "type": "string" 11 | }, 12 | "mood": { 13 | "anyOf": [ 14 | { 15 | "type": "string" 16 | }, 17 | { 18 | "type": "null" 19 | } 20 | ], 21 | "default": null, 22 | "title": "Mood" 23 | }, 24 | "projects": { 25 | "items": { 26 | "type": "string" 27 | }, 28 | "title": "Projects", 29 | "type": "array" 30 | }, 31 | "summary": { 32 | "anyOf": [ 33 | { 34 | "type": "string" 35 | }, 36 | { 37 | "type": "null" 38 | } 39 | ], 40 | "default": null, 41 | "title": "Summary" 42 | }, 43 | "tags": { 44 | "items": { 45 | "type": "string" 46 | }, 47 | "title": "Tags", 48 | "type": "array" 49 | }, 50 | "title": { 51 | "title": "Title", 52 | "type": "string" 53 | } 54 | }, 55 | "required": [ 56 | "id", 57 | "created_at", 58 | "title" 59 | ], 60 | "title": "JournalEntry", 61 | "type": "object" 62 | } 63 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.claims.Provenance.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "SourceRef": { 4 | "description": "Reference to a normalized entry that supports a claim or fact.", 5 | "properties": { 6 | "entry_id": { 7 | "title": "Entry Id", 8 | "type": "string" 9 | } 10 | }, 11 | "required": [ 12 | "entry_id" 13 | ], 14 | "title": "SourceRef", 15 | "type": "object" 16 | } 17 | }, 18 | "description": "Provenance metadata recorded for a claim atom.", 19 | "properties": { 20 | "first_seen": { 21 | "anyOf": [ 22 | { 23 | "type": "string" 24 | }, 25 | { 26 | "type": "null" 27 | } 28 | ], 29 | "default": null, 30 | "title": "First Seen" 31 | }, 32 | "last_updated": { 33 | "title": "Last Updated", 34 | "type": "string" 35 | }, 36 | "observation_count": { 37 | "default": 1, 38 | "minimum": 1, 39 | "title": "Observation Count", 40 | "type": "integer" 41 | }, 42 | "sources": { 43 | "items": { 44 | "$ref": "#/$defs/SourceRef" 45 | }, 46 | "title": "Sources", 47 | "type": "array" 48 | } 49 | }, 50 | "required": [ 51 | "last_updated" 52 | ], 53 | "title": "Provenance", 54 | "type": "object" 55 | } 56 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.prompts.PromptFacetItem.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "FacetOperation": { 4 | "enum": [ 5 | "set", 6 | "remove", 7 | "merge" 8 | ], 9 | "title": "FacetOperation", 10 | "type": "string" 11 | } 12 | }, 13 | "description": "Lightweight facet change that LLM emits (no system metadata).", 14 | "properties": { 15 | "evidence_entry": { 16 | "anyOf": [ 17 | { 18 | "type": "string" 19 | }, 20 | { 21 | "type": "null" 22 | } 23 | ], 24 | "default": null, 25 | "title": "Evidence Entry" 26 | }, 27 | "operation": { 28 | "$ref": "#/$defs/FacetOperation" 29 | }, 30 | "path": { 31 | "title": "Path", 32 | "type": "string" 33 | }, 34 | "reason": { 35 | "anyOf": [ 36 | { 37 | "type": "string" 38 | }, 39 | { 40 | "type": "null" 41 | } 42 | ], 43 | "default": null, 44 | "title": "Reason" 45 | }, 46 | "value": { 47 | "anyOf": [ 48 | {}, 49 | { 50 | "type": "null" 51 | } 52 | ], 53 | "default": null, 54 | "title": "Value" 55 | } 56 | }, 57 | "required": [ 58 | "path", 59 | "operation" 60 | ], 61 | "title": "PromptFacetItem", 62 | "type": "object" 63 | } 64 | -------------------------------------------------------------------------------- /src/aijournal/pipelines/advise.py: -------------------------------------------------------------------------------- 1 | """Pipeline helpers for generating advice cards.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING, Any 6 | 7 | from aijournal.fakes import fake_advise 8 | 9 | if TYPE_CHECKING: 10 | from collections.abc import Callable, Sequence 11 | 12 | from aijournal.domain.claims import ClaimAtom 13 | from aijournal.models.derived import AdviceCard 14 | 15 | 16 | def generate_advice( 17 | question: str, 18 | profile: dict[str, Any], 19 | claims: Sequence[ClaimAtom], 20 | *, 21 | use_fake_llm: bool, 22 | advice_identifier: Callable[[str], str], 23 | llm_advice: AdviceCard | None, 24 | rankings: Sequence[object], 25 | pending_prompts: Sequence[str], 26 | ) -> AdviceCard: 27 | """Produce an `AdviceCard` for the given question.""" 28 | if use_fake_llm: 29 | return fake_advise( 30 | question, 31 | profile, 32 | claims, 33 | advice_identifier=advice_identifier, 34 | rankings=rankings, 35 | pending_prompts=pending_prompts, 36 | ) 37 | 38 | if llm_advice is None: 39 | msg = "llm_advice must be provided when fake mode is disabled" 40 | raise ValueError(msg) 41 | advice = llm_advice.model_copy(deep=True) 42 | if not advice.id: 43 | advice.id = advice_identifier(question) 44 | return advice 45 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.facts.MicrofactConsolidationLog.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "MicrofactConsolidationSummary": { 4 | "description": "Per-day summary emitted during rebuild operations.", 5 | "properties": { 6 | "day": { 7 | "title": "Day", 8 | "type": "string" 9 | }, 10 | "merged_records": { 11 | "title": "Merged Records", 12 | "type": "integer" 13 | }, 14 | "new_records": { 15 | "title": "New Records", 16 | "type": "integer" 17 | }, 18 | "processed": { 19 | "title": "Processed", 20 | "type": "integer" 21 | } 22 | }, 23 | "required": [ 24 | "day", 25 | "processed", 26 | "new_records", 27 | "merged_records" 28 | ], 29 | "title": "MicrofactConsolidationSummary", 30 | "type": "object" 31 | } 32 | }, 33 | "description": "Artifact capturing the rebuild run summaries.", 34 | "properties": { 35 | "entries": { 36 | "items": { 37 | "$ref": "#/$defs/MicrofactConsolidationSummary" 38 | }, 39 | "title": "Entries", 40 | "type": "array" 41 | }, 42 | "generated_at": { 43 | "title": "Generated At", 44 | "type": "string" 45 | } 46 | }, 47 | "required": [ 48 | "generated_at" 49 | ], 50 | "title": "MicrofactConsolidationLog", 51 | "type": "object" 52 | } 53 | -------------------------------------------------------------------------------- /src/aijournal/domain/advice.py: -------------------------------------------------------------------------------- 1 | """Strict advice card models shared by CLI and services.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pydantic import Field 6 | 7 | from aijournal.common.base import StrictModel 8 | 9 | 10 | class AdviceReference(StrictModel): 11 | """References included to ground why advice fits.""" 12 | 13 | facets: list[str] = Field(default_factory=list) 14 | claims: list[str] = Field(default_factory=list) 15 | 16 | 17 | class AdviceRecommendation(StrictModel): 18 | """Single recommendation within an advice card.""" 19 | 20 | title: str 21 | why_this_fits_you: AdviceReference = Field(default_factory=AdviceReference) 22 | steps: list[str] = Field(default_factory=list) 23 | risks: list[str] = Field(default_factory=list) 24 | mitigations: list[str] = Field(default_factory=list) 25 | 26 | 27 | class AdviceCard(StrictModel): 28 | """Structured advice payload produced by LLM pipelines.""" 29 | 30 | id: str | None = None 31 | query: str 32 | assumptions: list[str] = Field(default_factory=list) 33 | recommendations: list[AdviceRecommendation] = Field(default_factory=list) 34 | tradeoffs: list[str] = Field(default_factory=list) 35 | next_actions: list[str] = Field(default_factory=list) 36 | confidence: float | None = None 37 | alignment: AdviceReference = Field(default_factory=AdviceReference) 38 | style: dict[str, object] = Field(default_factory=dict) 39 | -------------------------------------------------------------------------------- /prompts/examples/advise.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": null, 3 | "query": "How can I lock in my morning focus routine?", 4 | "assumptions": [ 5 | "You can reserve time before noon without conflicts." 6 | ], 7 | "recommendations": [ 8 | { 9 | "title": "Book consistent focus blocks", 10 | "why_this_fits_you": { 11 | "facets": [ 12 | "habits.focus_block.length_minutes" 13 | ], 14 | "claims": [ 15 | "goal.focus_hours_per_week" 16 | ] 17 | }, 18 | "steps": [ 19 | "Reserve two 45-minute focus sessions before noon.", 20 | "Protect the blocks in your calendar and announce them to teammates." 21 | ], 22 | "risks": [ 23 | "Teammates may schedule over the reserved time." 24 | ], 25 | "mitigations": [ 26 | "Share the focus plan during the weekly sync." 27 | ] 28 | } 29 | ], 30 | "tradeoffs": [ 31 | "Less flexibility for early collaboration." 32 | ], 33 | "next_actions": [ 34 | "Send a note to the team about your focus blocks today." 35 | ], 36 | "confidence": 0.72, 37 | "alignment": { 38 | "facets": [ 39 | "values_motivations.recurring_theme" 40 | ], 41 | "claims": [ 42 | "goal.focus_hours_per_week" 43 | ] 44 | }, 45 | "style": { 46 | "tone": "direct", 47 | "reading_level": "intermediate", 48 | "include_risks": true, 49 | "coaching_prompts": false 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.facts.MicroFact.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "SourceRef": { 4 | "description": "Reference to a normalized entry that supports a claim or fact.", 5 | "properties": { 6 | "entry_id": { 7 | "title": "Entry Id", 8 | "type": "string" 9 | } 10 | }, 11 | "required": [ 12 | "entry_id" 13 | ], 14 | "title": "SourceRef", 15 | "type": "object" 16 | } 17 | }, 18 | "properties": { 19 | "confidence": { 20 | "title": "Confidence", 21 | "type": "number" 22 | }, 23 | "evidence": { 24 | "$ref": "#/$defs/SourceRef" 25 | }, 26 | "first_seen": { 27 | "anyOf": [ 28 | { 29 | "type": "string" 30 | }, 31 | { 32 | "type": "null" 33 | } 34 | ], 35 | "default": null, 36 | "title": "First Seen" 37 | }, 38 | "id": { 39 | "title": "Id", 40 | "type": "string" 41 | }, 42 | "last_seen": { 43 | "anyOf": [ 44 | { 45 | "type": "string" 46 | }, 47 | { 48 | "type": "null" 49 | } 50 | ], 51 | "default": null, 52 | "title": "Last Seen" 53 | }, 54 | "statement": { 55 | "title": "Statement", 56 | "type": "string" 57 | } 58 | }, 59 | "required": [ 60 | "id", 61 | "statement", 62 | "confidence", 63 | "evidence" 64 | ], 65 | "title": "MicroFact", 66 | "type": "object" 67 | } 68 | -------------------------------------------------------------------------------- /tests/common/test_meta.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import pytest 4 | from pydantic import ValidationError 5 | 6 | from aijournal.common.base import StrictModel 7 | from aijournal.common.meta import Artifact, ArtifactKind, ArtifactMeta, LLMResult 8 | 9 | 10 | class _Payload(StrictModel): 11 | value: int 12 | 13 | 14 | def test_artifact_meta_requires_timestamp() -> None: 15 | with pytest.raises(ValidationError, match="created_at"): 16 | ArtifactMeta.model_validate({}) 17 | 18 | 19 | def test_artifact_defaults_and_strictness() -> None: 20 | meta = ArtifactMeta(created_at="2025-10-29T00:00:00Z") 21 | artifact = Artifact[_Payload]( 22 | kind=ArtifactKind.SUMMARY_DAILY, 23 | meta=meta, 24 | data=_Payload(value=1), 25 | ) 26 | assert artifact.kind is ArtifactKind.SUMMARY_DAILY 27 | assert artifact.model_dump().keys() == {"kind", "meta", "data"} 28 | 29 | artifact = Artifact[_Payload]( 30 | kind=ArtifactKind.SUMMARY_DAILY, 31 | meta=meta, 32 | data=_Payload(value=1), 33 | extra_field="nope", # type: ignore[arg-type] 34 | ) 35 | assert "extra_field" not in artifact.model_dump() 36 | 37 | 38 | def test_llm_result_structure() -> None: 39 | result = LLMResult[_Payload]( 40 | model="gpt-oss:20b", 41 | prompt_path="prompts/example.md", 42 | created_at="2025-10-29T00:00:00Z", 43 | payload=_Payload(value=9), 44 | ) 45 | 46 | assert result.payload.value == 9 47 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.advice.AdviceRecommendation.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "AdviceReference": { 4 | "description": "References included to ground why advice fits.", 5 | "properties": { 6 | "claims": { 7 | "items": { 8 | "type": "string" 9 | }, 10 | "title": "Claims", 11 | "type": "array" 12 | }, 13 | "facets": { 14 | "items": { 15 | "type": "string" 16 | }, 17 | "title": "Facets", 18 | "type": "array" 19 | } 20 | }, 21 | "title": "AdviceReference", 22 | "type": "object" 23 | } 24 | }, 25 | "description": "Single recommendation within an advice card.", 26 | "properties": { 27 | "mitigations": { 28 | "items": { 29 | "type": "string" 30 | }, 31 | "title": "Mitigations", 32 | "type": "array" 33 | }, 34 | "risks": { 35 | "items": { 36 | "type": "string" 37 | }, 38 | "title": "Risks", 39 | "type": "array" 40 | }, 41 | "steps": { 42 | "items": { 43 | "type": "string" 44 | }, 45 | "title": "Steps", 46 | "type": "array" 47 | }, 48 | "title": { 49 | "title": "Title", 50 | "type": "string" 51 | }, 52 | "why_this_fits_you": { 53 | "$ref": "#/$defs/AdviceReference" 54 | } 55 | }, 56 | "required": [ 57 | "title" 58 | ], 59 | "title": "AdviceRecommendation", 60 | "type": "object" 61 | } 62 | -------------------------------------------------------------------------------- /src/aijournal/utils/text.py: -------------------------------------------------------------------------------- 1 | """String utilities shared across capture/ingest flows.""" 2 | 3 | from __future__ import annotations 4 | 5 | import unicodedata 6 | 7 | INVISIBLE_PREFIX_CHARACTERS = { 8 | "\ufeff", # UTF-8 BOM / zero-width no-break space 9 | "\u200b", # zero-width space 10 | "\u200c", # zero-width non-joiner 11 | "\u200d", # zero-width joiner 12 | "\u2060", # word joiner 13 | "\u2061", # function application 14 | "\u2062", # invisible times 15 | "\u2063", # invisible separator 16 | "\u2064", # invisible plus 17 | "\u202a", # left-to-right embedding 18 | "\u202b", # right-to-left embedding 19 | "\u202c", # pop directional formatting 20 | "\u202d", # left-to-right override 21 | "\u202e", # right-to-left override 22 | } 23 | 24 | 25 | def strip_invisible_prefix(text: str) -> str: 26 | """Remove invisible control characters that precede visible content.""" 27 | index = 0 28 | length = len(text) 29 | while index < length: 30 | char = text[index] 31 | if char == "\x00": # stray NULL bytes from some exports 32 | index += 1 33 | continue 34 | if char in INVISIBLE_PREFIX_CHARACTERS: 35 | index += 1 36 | continue 37 | if unicodedata.category(char) == "Cf": 38 | index += 1 39 | continue 40 | break 41 | if index: 42 | return text[index:] 43 | return text 44 | 45 | 46 | __all__ = ["strip_invisible_prefix"] 47 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.persona.InterviewSet.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "InterviewQuestion": { 4 | "description": "Structured interview question proposed by the characterization pipeline.", 5 | "properties": { 6 | "id": { 7 | "title": "Id", 8 | "type": "string" 9 | }, 10 | "priority": { 11 | "anyOf": [ 12 | { 13 | "type": "string" 14 | }, 15 | { 16 | "type": "null" 17 | } 18 | ], 19 | "default": null, 20 | "title": "Priority" 21 | }, 22 | "target_facet": { 23 | "anyOf": [ 24 | { 25 | "type": "string" 26 | }, 27 | { 28 | "type": "null" 29 | } 30 | ], 31 | "default": null, 32 | "title": "Target Facet" 33 | }, 34 | "text": { 35 | "title": "Text", 36 | "type": "string" 37 | } 38 | }, 39 | "required": [ 40 | "id", 41 | "text" 42 | ], 43 | "title": "InterviewQuestion", 44 | "type": "object" 45 | } 46 | }, 47 | "description": "Collection of interview questions to review with the operator.", 48 | "properties": { 49 | "questions": { 50 | "items": { 51 | "$ref": "#/$defs/InterviewQuestion" 52 | }, 53 | "title": "Questions", 54 | "type": "array" 55 | } 56 | }, 57 | "title": "InterviewSet", 58 | "type": "object" 59 | } 60 | -------------------------------------------------------------------------------- /src/aijournal/domain/claims.py: -------------------------------------------------------------------------------- 1 | """Strict claim/domain models shared across persona and profile pipelines.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pydantic import Field 6 | 7 | from aijournal.common.base import StrictModel 8 | from aijournal.domain.enums import ClaimStatus, ClaimType 9 | from aijournal.domain.evidence import SourceRef 10 | 11 | # Type alias for claim evidence sources. 12 | ClaimSource = SourceRef 13 | 14 | 15 | class Scope(StrictModel): 16 | """Contextual qualifiers for a claim atom.""" 17 | 18 | domain: str | None = None 19 | context: list[str] = Field(default_factory=list) 20 | 21 | 22 | class Provenance(StrictModel): 23 | """Provenance metadata recorded for a claim atom.""" 24 | 25 | sources: list[ClaimSource] = Field(default_factory=list) 26 | first_seen: str | None = None 27 | last_updated: str 28 | observation_count: int = Field(default=1, ge=1) 29 | 30 | 31 | class ClaimAtom(StrictModel): 32 | """Typed, scoped claim describing part of the persona.""" 33 | 34 | id: str 35 | type: ClaimType 36 | subject: str 37 | predicate: str 38 | statement: str 39 | scope: Scope = Field(default_factory=Scope) 40 | strength: float = Field(default=0.5, ge=0.0, le=1.0) 41 | status: ClaimStatus = ClaimStatus.TENTATIVE 42 | review_after_days: int = 120 43 | provenance: Provenance 44 | 45 | 46 | class ClaimAtomsFile(StrictModel): 47 | """Container persisted on disk for multiple claim atoms.""" 48 | 49 | claims: list[ClaimAtom] = Field(default_factory=list) 50 | -------------------------------------------------------------------------------- /src/aijournal/domain/index.py: -------------------------------------------------------------------------------- 1 | """Domain models for retrieval chunks and index metadata.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pydantic import Field 6 | 7 | from aijournal.common.base import StrictModel 8 | 9 | 10 | class Chunk(StrictModel): 11 | """Normalized chunk persisted in the retrieval index.""" 12 | 13 | chunk_id: str 14 | normalized_id: str 15 | chunk_index: int 16 | text: str 17 | chunk_type: str = "entry" 18 | date: str 19 | tags: list[str] = Field(default_factory=list) 20 | source_type: str | None = None 21 | source_path: str 22 | tokens: int 23 | source_hash: str | None = None 24 | manifest_hash: str | None = None 25 | 26 | 27 | class RetrievedChunk(Chunk): 28 | """Chunk returned from retrieval with a similarity score.""" 29 | 30 | score: float 31 | 32 | 33 | class IndexMeta(StrictModel): 34 | """Metadata describing the current retrieval index state.""" 35 | 36 | embedding_model: str | None = None 37 | vector_dimension: int | None = None 38 | chunk_count: int | None = None 39 | entry_count: int | None = None 40 | mode: str | None = None 41 | fake_mode: bool | None = None 42 | search_k_factor: float | None = None 43 | char_per_token: float | None = None 44 | since: str | None = None 45 | limit: int | None = None 46 | touched_dates: list[str] = Field(default_factory=list) 47 | updated_at: str | None = None 48 | 49 | 50 | class ChunkBatch(StrictModel): 51 | """Exported chunk set for a given journal day.""" 52 | 53 | day: str 54 | chunks: list[Chunk] = Field(default_factory=list) 55 | -------------------------------------------------------------------------------- /schemas/core/aijournal.api.chat.ChatResponse.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "ChatCitationRef": { 4 | "description": "Reference emitted by the LLM; resolved against retrieved chunks.", 5 | "properties": { 6 | "code": { 7 | "minLength": 1, 8 | "title": "Code", 9 | "type": "string" 10 | } 11 | }, 12 | "required": [ 13 | "code" 14 | ], 15 | "title": "ChatCitationRef", 16 | "type": "object" 17 | } 18 | }, 19 | "description": "Structured response returned by the chat LLM.", 20 | "properties": { 21 | "answer": { 22 | "maxLength": 4000, 23 | "title": "Answer", 24 | "type": "string" 25 | }, 26 | "citations": { 27 | "items": { 28 | "$ref": "#/$defs/ChatCitationRef" 29 | }, 30 | "title": "Citations", 31 | "type": "array" 32 | }, 33 | "clarifying_question": { 34 | "anyOf": [ 35 | { 36 | "type": "string" 37 | }, 38 | { 39 | "type": "null" 40 | } 41 | ], 42 | "default": null, 43 | "title": "Clarifying Question" 44 | }, 45 | "telemetry": { 46 | "additionalProperties": true, 47 | "title": "Telemetry", 48 | "type": "object" 49 | }, 50 | "timestamp": { 51 | "anyOf": [ 52 | { 53 | "type": "string" 54 | }, 55 | { 56 | "type": "null" 57 | } 58 | ], 59 | "default": null, 60 | "title": "Timestamp" 61 | } 62 | }, 63 | "required": [ 64 | "answer" 65 | ], 66 | "title": "ChatResponse", 67 | "type": "object" 68 | } 69 | -------------------------------------------------------------------------------- /schemas/core/aijournal.models.authoritative.SelfProfile.json: -------------------------------------------------------------------------------- 1 | { 2 | "properties": { 3 | "affect_energy": { 4 | "additionalProperties": true, 5 | "title": "Affect Energy", 6 | "type": "object" 7 | }, 8 | "boundaries_ethics": { 9 | "additionalProperties": true, 10 | "title": "Boundaries Ethics", 11 | "type": "object" 12 | }, 13 | "coaching_prefs": { 14 | "additionalProperties": true, 15 | "title": "Coaching Prefs", 16 | "type": "object" 17 | }, 18 | "dashboard": { 19 | "additionalProperties": true, 20 | "title": "Dashboard", 21 | "type": "object" 22 | }, 23 | "decision_style": { 24 | "additionalProperties": true, 25 | "title": "Decision Style", 26 | "type": "object" 27 | }, 28 | "goals": { 29 | "additionalProperties": true, 30 | "title": "Goals", 31 | "type": "object" 32 | }, 33 | "habits": { 34 | "additionalProperties": true, 35 | "title": "Habits", 36 | "type": "object" 37 | }, 38 | "planning": { 39 | "additionalProperties": true, 40 | "title": "Planning", 41 | "type": "object" 42 | }, 43 | "social": { 44 | "additionalProperties": true, 45 | "title": "Social", 46 | "type": "object" 47 | }, 48 | "traits": { 49 | "additionalProperties": true, 50 | "title": "Traits", 51 | "type": "object" 52 | }, 53 | "values_motivations": { 54 | "additionalProperties": true, 55 | "title": "Values Motivations", 56 | "type": "object" 57 | } 58 | }, 59 | "title": "SelfProfile", 60 | "type": "object" 61 | } 62 | -------------------------------------------------------------------------------- /docs/prompt_improvement_request.md: -------------------------------------------------------------------------------- 1 | # Prompt Improvement Request 2 | 3 | You are an expert prompt engineer tasked with improving the `aijournal` CLI prompts. You have access to the following materials: 4 | 5 | 1. `docs/prompt_evaluation_report.md` – detailed findings from the latest capture run, including successes, failure modes, and per-prompt examples. 6 | 2. `ARCHITECTURE.md` – system design, persona/memory layers, pipelines, and schema guarantees. 7 | 3. `docs/workflow.md` – operator workflow, command order, and pipeline expectations. 8 | 4. `TLDR.md` – capture pipeline quick reference (stages, inputs, outputs). 9 | 5. `README.md` – product overview, goals, and runtime prerequisites. 10 | 11 | ## Your Tasks 12 | 1. For each prompt under `prompts/` (`summarize_day.md`, `extract_facts.md`, `profile_update.md`, `interview.md`, `advise.md`), propose concrete improvements that address the failure modes documented in the report. Include: 13 | - Specific instruction changes (extra constraints, better examples, reminders of schema contracts). 14 | - Validation guardrails (evidence span requirements, duplicate suppression, allowed enums/paths, etc.). 15 | - Any supporting tooling/pipeline adjustments needed for the prompt to operate reliably. 16 | 2. Prioritize fixes that unblock downstream stages (profile updates, persona, advice) and explain inter-prompt dependencies where relevant. 17 | 3. List open questions or follow-up tests required after revising the prompts. 18 | 19 | Deliver your response as a structured plan with headings per prompt plus cross-cutting recommendations. Cite relevant sections/lines in the supplied docs whenever the rationale depends on architectural or workflow decisions. 20 | -------------------------------------------------------------------------------- /src/aijournal/common/constants.py: -------------------------------------------------------------------------------- 1 | """Shared configuration constants. 2 | 3 | This module contains constants used across multiple modules: 4 | - Infrastructure/environment configuration (Ollama, embeddings) 5 | - Shared formats and protocols (timeouts, file suffixes) 6 | - Cross-cutting paths and settings 7 | 8 | Module-specific constants should remain in their respective modules. 9 | """ 10 | 11 | # ============================================================================ 12 | # LLM & Model Configuration 13 | # ============================================================================ 14 | DEFAULT_OLLAMA_HOST = "http://127.0.0.1:11434" 15 | DEFAULT_MODEL_NAME = "gpt-oss:20b" 16 | DEFAULT_LLM_RETRIES = 4 17 | 18 | # ============================================================================ 19 | # Embedding Configuration 20 | # ============================================================================ 21 | DEFAULT_EMBEDDING_MODEL = "embeddinggemma:300m" 22 | DEFAULT_EMBED_DIM = 384 23 | EMBED_TIMEOUT = 60.0 24 | 25 | # ============================================================================ 26 | # Timeouts 27 | # ============================================================================ 28 | DEFAULT_TIMEOUT_SECONDS = 120.0 29 | 30 | # ============================================================================ 31 | # File Formats 32 | # ============================================================================ 33 | MARKDOWN_SUFFIXES = {".md", ".markdown"} 34 | 35 | # ============================================================================ 36 | # Shared Paths 37 | # ============================================================================ 38 | PENDING_UPDATES_SUBDIR = "derived/pending/profile_updates" 39 | -------------------------------------------------------------------------------- /tests/test_claim_atoms.py: -------------------------------------------------------------------------------- 1 | """Unit tests for typed claim atom models.""" 2 | 3 | from __future__ import annotations 4 | 5 | from aijournal.domain.claims import ClaimAtom, ClaimAtomsFile 6 | 7 | 8 | def _sample_atom_dict() -> dict: 9 | return { 10 | "id": "pref.deep_work.window", 11 | "type": "preference", 12 | "subject": "deep_work", 13 | "predicate": "best_window", 14 | "statement": "Best deep work between 09:00–12:00 on weekdays.", 15 | "scope": { 16 | "domain": "work", 17 | "context": ["weekday"], 18 | }, 19 | "strength": 0.78, 20 | "status": "accepted", 21 | "review_after_days": 120, 22 | "provenance": { 23 | "sources": [ 24 | { 25 | "entry_id": "2025-10-25_x9t3", 26 | }, 27 | ], 28 | "first_seen": "2024-11-02", 29 | "last_updated": "2025-10-25T10:10:00Z", 30 | }, 31 | } 32 | 33 | 34 | def test_claim_atom_model_round_trip() -> None: 35 | atom = ClaimAtom.model_validate(_sample_atom_dict()) 36 | assert atom.type == "preference" 37 | assert atom.status == "accepted" 38 | assert atom.scope.domain == "work" 39 | assert atom.provenance.sources[0].entry_id == "2025-10-25_x9t3" 40 | 41 | dumped = atom.model_dump() 42 | assert dumped["scope"]["context"] == ["weekday"] 43 | assert dumped["provenance"]["sources"][0]["entry_id"] == "2025-10-25_x9t3" 44 | 45 | 46 | def test_claim_atoms_file_container() -> None: 47 | atoms_file = ClaimAtomsFile.model_validate({"claims": [_sample_atom_dict()]}) 48 | assert len(atoms_file.claims) == 1 49 | assert atoms_file.claims[0].statement.startswith("Best deep work") 50 | -------------------------------------------------------------------------------- /src/aijournal/services/capture/results.py: -------------------------------------------------------------------------------- 1 | """Lightweight result models shared by orchestration code.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING, Any 6 | 7 | from pydantic import BaseModel, Field 8 | 9 | if TYPE_CHECKING: 10 | from collections.abc import Iterable 11 | 12 | 13 | class OperationResult(BaseModel): 14 | """Outcome of a single operation/stage.""" 15 | 16 | ok: bool = True 17 | changed: bool = False 18 | message: str = "" 19 | artifacts: list[str] = Field(default_factory=list) 20 | warnings: list[str] = Field(default_factory=list) 21 | details: dict[str, Any] = Field(default_factory=dict) 22 | 23 | model_config = {"arbitrary_types_allowed": True} 24 | 25 | @classmethod 26 | def noop(cls, message: str = "nothing to do", **kwargs: Any) -> OperationResult: 27 | return cls(ok=True, changed=False, message=message, **kwargs) 28 | 29 | @classmethod 30 | def wrote( 31 | cls, 32 | artifacts: Iterable[str], 33 | message: str = "written", 34 | **kwargs: Any, 35 | ) -> OperationResult: 36 | artifacts_list = list(artifacts) 37 | return cls( 38 | ok=True, 39 | changed=bool(artifacts_list), 40 | message=message, 41 | artifacts=artifacts_list, 42 | **kwargs, 43 | ) 44 | 45 | @classmethod 46 | def fail(cls, message: str, **kwargs: Any) -> OperationResult: 47 | return cls(ok=False, changed=False, message=message, **kwargs) 48 | 49 | 50 | class StageResult(BaseModel): 51 | """Execution metadata for a single capture stage.""" 52 | 53 | stage: str 54 | result: OperationResult 55 | duration_ms: float 56 | 57 | model_config = {"arbitrary_types_allowed": True} 58 | -------------------------------------------------------------------------------- /schemas/core/aijournal.common.meta.LLMResult.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Captured LLM invocation details paired with the structured payload.", 3 | "properties": { 4 | "attempts": { 5 | "default": 1, 6 | "title": "Attempts", 7 | "type": "integer" 8 | }, 9 | "coercions_applied": { 10 | "items": { 11 | "additionalProperties": { 12 | "type": "string" 13 | }, 14 | "type": "object" 15 | }, 16 | "title": "Coercions Applied", 17 | "type": "array" 18 | }, 19 | "created_at": { 20 | "title": "Created At", 21 | "type": "string" 22 | }, 23 | "model": { 24 | "title": "Model", 25 | "type": "string" 26 | }, 27 | "payload": { 28 | "title": "Payload" 29 | }, 30 | "prompt_hash": { 31 | "anyOf": [ 32 | { 33 | "type": "string" 34 | }, 35 | { 36 | "type": "null" 37 | } 38 | ], 39 | "default": null, 40 | "title": "Prompt Hash" 41 | }, 42 | "prompt_kind": { 43 | "anyOf": [ 44 | { 45 | "type": "string" 46 | }, 47 | { 48 | "type": "null" 49 | } 50 | ], 51 | "default": null, 52 | "title": "Prompt Kind" 53 | }, 54 | "prompt_path": { 55 | "title": "Prompt Path", 56 | "type": "string" 57 | }, 58 | "prompt_set": { 59 | "anyOf": [ 60 | { 61 | "type": "string" 62 | }, 63 | { 64 | "type": "null" 65 | } 66 | ], 67 | "default": null, 68 | "title": "Prompt Set" 69 | } 70 | }, 71 | "required": [ 72 | "model", 73 | "prompt_path", 74 | "created_at", 75 | "payload" 76 | ], 77 | "title": "LLMResult", 78 | "type": "object" 79 | } 80 | -------------------------------------------------------------------------------- /tests/test_cli_ollama_health.py: -------------------------------------------------------------------------------- 1 | """Tests for `aijournal ollama health`.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | 7 | import pytest 8 | 9 | from aijournal.cli import app 10 | 11 | if TYPE_CHECKING: 12 | from typer.testing import CliRunner 13 | 14 | 15 | def _has_ollama_health_command() -> bool: 16 | return any(cmd.name == "ollama" for cmd in app.registered_commands) 17 | 18 | 19 | @pytest.fixture(autouse=True) 20 | def skip_if_ollama_missing() -> None: 21 | if not _has_ollama_health_command(): 22 | pytest.skip("ollama health command not available yet") 23 | 24 | 25 | @pytest.fixture(autouse=True) 26 | def fake_ollama(monkeypatch: pytest.MonkeyPatch) -> None: 27 | monkeypatch.setenv("AIJOURNAL_FAKE_OLLAMA", "1") 28 | monkeypatch.delenv("HTTP_PROXY", raising=False) 29 | monkeypatch.delenv("HTTPS_PROXY", raising=False) 30 | 31 | 32 | def test_ollama_health_reports_models_and_default(cli_runner: CliRunner) -> None: 33 | result = cli_runner.invoke(app, ["ops", "system", "ollama", "health"]) 34 | assert result.exit_code == 0, result.output 35 | normalized = result.output.lower() 36 | assert "models" in normalized 37 | assert "default" in normalized 38 | 39 | 40 | def test_ollama_health_is_idempotent(cli_runner: CliRunner) -> None: 41 | first = cli_runner.invoke(app, ["ops", "system", "ollama", "health"]) 42 | assert first.exit_code == 0, first.output 43 | 44 | second = cli_runner.invoke(app, ["ops", "system", "ollama", "health"]) 45 | assert second.exit_code == 0, second.output 46 | normalized_first = first.output.lower() 47 | normalized_second = second.output.lower() 48 | for token in ("models", "default"): 49 | assert token in normalized_first 50 | assert token in normalized_second 51 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.facts.ConsolidatedMicroFact.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Global microfact entry that survives consolidation runs.", 3 | "properties": { 4 | "canonical_statement": { 5 | "title": "Canonical Statement", 6 | "type": "string" 7 | }, 8 | "confidence": { 9 | "title": "Confidence", 10 | "type": "number" 11 | }, 12 | "contexts": { 13 | "items": { 14 | "type": "string" 15 | }, 16 | "title": "Contexts", 17 | "type": "array" 18 | }, 19 | "domain": { 20 | "anyOf": [ 21 | { 22 | "type": "string" 23 | }, 24 | { 25 | "type": "null" 26 | } 27 | ], 28 | "default": null, 29 | "title": "Domain" 30 | }, 31 | "evidence_entries": { 32 | "items": { 33 | "type": "string" 34 | }, 35 | "title": "Evidence Entries", 36 | "type": "array" 37 | }, 38 | "first_seen": { 39 | "title": "First Seen", 40 | "type": "string" 41 | }, 42 | "id": { 43 | "title": "Id", 44 | "type": "string" 45 | }, 46 | "last_seen": { 47 | "title": "Last Seen", 48 | "type": "string" 49 | }, 50 | "observation_count": { 51 | "title": "Observation Count", 52 | "type": "integer" 53 | }, 54 | "source_fact_ids": { 55 | "items": { 56 | "type": "string" 57 | }, 58 | "title": "Source Fact Ids", 59 | "type": "array" 60 | }, 61 | "statement": { 62 | "title": "Statement", 63 | "type": "string" 64 | } 65 | }, 66 | "required": [ 67 | "id", 68 | "statement", 69 | "canonical_statement", 70 | "confidence", 71 | "first_seen", 72 | "last_seen", 73 | "observation_count" 74 | ], 75 | "title": "ConsolidatedMicroFact", 76 | "type": "object" 77 | } 78 | -------------------------------------------------------------------------------- /src/aijournal/api/capture.py: -------------------------------------------------------------------------------- 1 | """Public capture API models.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Literal 6 | 7 | from pydantic import Field 8 | 9 | from aijournal.common.base import StrictModel 10 | 11 | 12 | class CaptureRequest(StrictModel): 13 | """User-facing capture options supplied by CLI or HTTP.""" 14 | 15 | source: Literal["stdin", "editor", "file", "dir"] 16 | text: str | None = None 17 | paths: list[str] = Field(default_factory=list) 18 | source_type: Literal["journal", "notes", "blog"] = "journal" 19 | date: str | None = None 20 | title: str | None = None 21 | slug: str | None = None 22 | tags: list[str] = Field(default_factory=list) 23 | projects: list[str] = Field(default_factory=list) 24 | mood: str | None = None 25 | apply_profile: Literal["auto", "review"] = "auto" 26 | rebuild: Literal["auto", "always", "skip"] = "auto" 27 | pack: Literal["L1", "L3", "L4"] | None = None 28 | retries: int | None = Field( 29 | default=None, 30 | ge=0, 31 | description=( 32 | "Optional override for LLM retries; defaults to workspace configuration when unset." 33 | ), 34 | ) 35 | progress: bool = True 36 | dry_run: bool = False 37 | snapshot: bool = True 38 | 39 | 40 | class CaptureInput(CaptureRequest): 41 | """Internal capture payload enriched with stage bounds.""" 42 | 43 | min_stage: int = Field(0, ge=0) 44 | max_stage: int = Field(7, ge=0) 45 | 46 | @classmethod 47 | def from_request( 48 | cls, 49 | request: CaptureRequest, 50 | *, 51 | min_stage: int, 52 | max_stage: int, 53 | ) -> CaptureInput: 54 | payload = request.model_dump(mode="python") 55 | payload.update({"min_stage": min_stage, "max_stage": max_stage}) 56 | return cls.model_validate(payload) 57 | -------------------------------------------------------------------------------- /src/aijournal/services/capture/stages/stage1_normalize.py: -------------------------------------------------------------------------------- 1 | """Stage 1: normalize captured Markdown into structured entries.""" 2 | 3 | from __future__ import annotations 4 | 5 | from time import perf_counter 6 | from typing import TYPE_CHECKING 7 | 8 | if TYPE_CHECKING: 9 | from pathlib import Path 10 | 11 | from aijournal.common.app_config import AppConfig 12 | from aijournal.services.capture import NormalizeStageOutputs 13 | 14 | from .stage0_persist import EntryResult 15 | 16 | 17 | def run_normalize_stage_1( 18 | entry_results: list[EntryResult], 19 | root: Path, 20 | config: AppConfig, 21 | ) -> NormalizeStageOutputs: 22 | from aijournal.services.capture import NormalizeStageOutputs, normalize_entries 23 | from aijournal.services.capture.results import OperationResult 24 | 25 | normalize_start = perf_counter() 26 | artifact_counts = normalize_entries(entry_results, root, config) if entry_results else {} 27 | duration_ms = (perf_counter() - normalize_start) * 1000.0 28 | normalized_count = int(artifact_counts.get("normalized", 0)) 29 | normalized_paths = artifact_counts.get("paths", []) 30 | normalize_details: dict[str, object] = {"normalized": normalized_count} 31 | if normalized_count: 32 | message = f"{normalized_count} normalized entries updated" 33 | op_result = OperationResult.wrote( 34 | normalized_paths, 35 | message=message, 36 | details=normalize_details, 37 | ) 38 | else: 39 | op_result = OperationResult.noop( 40 | "normalized entries already up to date", 41 | details=normalize_details, 42 | ) 43 | changed_dates = sorted( 44 | {entry.date for entry in entry_results if entry.changed and not entry.deduped}, 45 | ) 46 | return NormalizeStageOutputs(artifact_counts, op_result, duration_ms, changed_dates) 47 | -------------------------------------------------------------------------------- /tests/test_cli_microfacts.py: -------------------------------------------------------------------------------- 1 | """Tests for microfacts ops commands.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | 7 | import yaml 8 | 9 | from aijournal.cli import app 10 | from tests.test_cli_facts import DATE, _write_normalized, _write_summary 11 | 12 | if TYPE_CHECKING: 13 | from pathlib import Path 14 | 15 | from typer.testing import CliRunner 16 | 17 | 18 | def _load_yaml(path: Path) -> dict[str, object]: 19 | return yaml.safe_load(path.read_text(encoding="utf-8")) 20 | 21 | 22 | def test_microfacts_rebuild_command_writes_artifacts( 23 | cli_workspace: Path, 24 | cli_runner: CliRunner, 25 | ) -> None: 26 | _write_normalized(cli_workspace) 27 | _write_summary(cli_workspace) 28 | 29 | # Generate daily microfacts first. 30 | first = cli_runner.invoke( 31 | app, 32 | ["ops", "pipeline", "extract-facts", "--date", DATE], 33 | ) 34 | assert first.exit_code == 0, first.stdout 35 | 36 | result = cli_runner.invoke(app, ["ops", "microfacts", "rebuild"]) 37 | 38 | assert result.exit_code == 0, result.stdout 39 | derived = cli_workspace / "derived" / "microfacts" 40 | consolidated = derived / "consolidated.yaml" 41 | assert consolidated.exists() 42 | consolidated_artifact = _load_yaml(consolidated) 43 | assert consolidated_artifact.get("kind") == "microfacts.consolidated" 44 | data = consolidated_artifact.get("data", {}) 45 | assert data.get("facts") or [], "Expected consolidated facts" 46 | 47 | logs_dir = derived / "logs" 48 | log_files = sorted(logs_dir.glob("rebuild-*.yaml")) 49 | assert log_files, "Expected a consolidation log file" 50 | log_payload = _load_yaml(log_files[-1]) 51 | assert log_payload.get("kind") == "microfacts.log" 52 | log_entries = log_payload.get("data", {}).get("entries") or [] 53 | assert log_entries, "Expected log entries in consolidation log" 54 | -------------------------------------------------------------------------------- /tests/services/capture/test_graceful_profile_update.py: -------------------------------------------------------------------------------- 1 | """Tests for the graceful profile update wrapper.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING, Never 6 | 7 | import typer 8 | 9 | from aijournal.common.app_config import AppConfig 10 | from aijournal.services.capture.graceful import graceful_profile_update 11 | 12 | if TYPE_CHECKING: 13 | from pathlib import Path 14 | 15 | 16 | def test_graceful_profile_update_success(tmp_path: Path, monkeypatch) -> None: 17 | batch_path = tmp_path / "derived" / "pending" / "profile_updates" / "test.yaml" 18 | batch_path.parent.mkdir(parents=True, exist_ok=True) 19 | 20 | def fake_run( 21 | date: str, 22 | *, 23 | progress: bool, 24 | generate_preview: bool, 25 | workspace: Path | None = None, 26 | config: AppConfig | None = None, 27 | ) -> Path: 28 | del date, progress, generate_preview, workspace, config 29 | batch_path.write_text("batch", encoding="utf-8") 30 | return batch_path 31 | 32 | monkeypatch.setattr("aijournal.commands.profile_update.run_profile_update", fake_run) 33 | 34 | path, error = graceful_profile_update( 35 | "2025-10-27", 36 | progress=False, 37 | generate_preview=False, 38 | workspace=tmp_path, 39 | config=AppConfig(), 40 | ) 41 | 42 | assert error is None 43 | assert path == batch_path 44 | 45 | 46 | def test_graceful_profile_update_failure(tmp_path: Path, monkeypatch) -> None: 47 | def failing_run(*_args, **_kwargs) -> Never: 48 | raise typer.Exit(1) 49 | 50 | monkeypatch.setattr("aijournal.commands.profile_update.run_profile_update", failing_run) 51 | 52 | path, error = graceful_profile_update( 53 | "2025-10-27", 54 | progress=False, 55 | generate_preview=False, 56 | workspace=tmp_path, 57 | config=AppConfig(), 58 | ) 59 | 60 | assert path is None 61 | assert error is not None 62 | -------------------------------------------------------------------------------- /tests/pipelines/test_advise.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from aijournal.domain.claims import ClaimAtom, Provenance, Scope 4 | from aijournal.models.derived import AdviceCard 5 | from aijournal.pipelines import advise 6 | 7 | 8 | def _claim(claim_id: str) -> ClaimAtom: 9 | return ClaimAtom( 10 | id=claim_id, 11 | type="preference", 12 | subject="self", 13 | predicate="insight", 14 | statement="Statement", 15 | scope=Scope(), 16 | strength=0.6, 17 | status="tentative", 18 | review_after_days=120, 19 | provenance=Provenance( 20 | sources=[], 21 | first_seen="2024-01-01", 22 | last_updated="2024-01-02T00:00:00Z", 23 | observation_count=1, 24 | ), 25 | ) 26 | 27 | 28 | def test_generate_advice_fake_mode() -> None: 29 | card = advise.generate_advice( 30 | "How should I focus?", 31 | profile={"values": {"top": ["Focus"]}}, 32 | claims=[_claim("claim-1")], 33 | use_fake_llm=True, 34 | advice_identifier=lambda q: "adv-test", 35 | llm_advice=None, 36 | rankings=[], 37 | pending_prompts=["Follow up"], 38 | ) 39 | 40 | assert isinstance(card, AdviceCard) 41 | assert card.id.startswith("adv-test") or card.id # ensure fake path returns AdviceCard 42 | 43 | 44 | def test_generate_advice_llm_path() -> None: 45 | response = AdviceCard( 46 | id="adv-1234", 47 | query="How should I focus?", 48 | assumptions=["Assumption"], 49 | recommendations=[], 50 | tradeoffs=[], 51 | next_actions=[], 52 | confidence=0.5, 53 | ) 54 | 55 | card = advise.generate_advice( 56 | "How should I focus?", 57 | profile={}, 58 | claims=[], 59 | use_fake_llm=False, 60 | advice_identifier=lambda q: "adv-test", 61 | llm_advice=response, 62 | rankings=[], 63 | pending_prompts=[], 64 | ) 65 | 66 | assert card.id == "adv-1234" 67 | -------------------------------------------------------------------------------- /src/aijournal/models/derived.py: -------------------------------------------------------------------------------- 1 | """Derived data models for aijournal.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pydantic import Field 6 | 7 | from aijournal.domain.advice import AdviceCard as _AdviceCard 8 | from aijournal.domain.advice import AdviceRecommendation as _AdviceRecommendation 9 | from aijournal.domain.advice import AdviceReference as _AdviceReference 10 | from aijournal.domain.changes import ProfileUpdateProposals 11 | from aijournal.domain.claims import ClaimAtom 12 | from aijournal.domain.events import ClaimPreviewEvent # noqa: TC001 13 | from aijournal.domain.persona import InterviewQuestion, InterviewSet, PersonaCore 14 | 15 | from .base import AijournalModel 16 | 17 | PersonaCore.model_rebuild(_types_namespace={"ClaimAtom": ClaimAtom}) 18 | InterviewSet.model_rebuild( 19 | _types_namespace={ 20 | "InterviewQuestion": InterviewQuestion, 21 | }, 22 | ) 23 | 24 | 25 | AdviceReference = _AdviceReference 26 | AdviceRecommendation = _AdviceRecommendation 27 | AdviceCard = _AdviceCard 28 | 29 | 30 | class ProfileUpdatePreview(AijournalModel): 31 | """Preview metadata bundled with a profile update batch.""" 32 | 33 | claim_events: list[ClaimPreviewEvent] = Field(default_factory=list) 34 | interview_prompts: list[str] = Field(default_factory=list) 35 | 36 | 37 | class ProfileUpdateInput(AijournalModel): 38 | """Normalized entry metadata captured in a characterization batch.""" 39 | 40 | id: str 41 | normalized_path: str 42 | source_hash: str | None = None 43 | manifest_hash: str | None = None 44 | tags: list[str] = Field(default_factory=list) 45 | 46 | 47 | class ProfileUpdateBatch(AijournalModel): 48 | """Pending profile update batch emitted by the unified profile update stage/CLI.""" 49 | 50 | batch_id: str 51 | created_at: str 52 | date: str 53 | inputs: list[ProfileUpdateInput] = Field(default_factory=list) 54 | proposals: ProfileUpdateProposals = Field(default_factory=ProfileUpdateProposals) 55 | preview: ProfileUpdatePreview | None = None 56 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.index.Chunk.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Normalized chunk persisted in the retrieval index.", 3 | "properties": { 4 | "chunk_id": { 5 | "title": "Chunk Id", 6 | "type": "string" 7 | }, 8 | "chunk_index": { 9 | "title": "Chunk Index", 10 | "type": "integer" 11 | }, 12 | "chunk_type": { 13 | "default": "entry", 14 | "title": "Chunk Type", 15 | "type": "string" 16 | }, 17 | "date": { 18 | "title": "Date", 19 | "type": "string" 20 | }, 21 | "manifest_hash": { 22 | "anyOf": [ 23 | { 24 | "type": "string" 25 | }, 26 | { 27 | "type": "null" 28 | } 29 | ], 30 | "default": null, 31 | "title": "Manifest Hash" 32 | }, 33 | "normalized_id": { 34 | "title": "Normalized Id", 35 | "type": "string" 36 | }, 37 | "source_hash": { 38 | "anyOf": [ 39 | { 40 | "type": "string" 41 | }, 42 | { 43 | "type": "null" 44 | } 45 | ], 46 | "default": null, 47 | "title": "Source Hash" 48 | }, 49 | "source_path": { 50 | "title": "Source Path", 51 | "type": "string" 52 | }, 53 | "source_type": { 54 | "anyOf": [ 55 | { 56 | "type": "string" 57 | }, 58 | { 59 | "type": "null" 60 | } 61 | ], 62 | "default": null, 63 | "title": "Source Type" 64 | }, 65 | "tags": { 66 | "items": { 67 | "type": "string" 68 | }, 69 | "title": "Tags", 70 | "type": "array" 71 | }, 72 | "text": { 73 | "title": "Text", 74 | "type": "string" 75 | }, 76 | "tokens": { 77 | "title": "Tokens", 78 | "type": "integer" 79 | } 80 | }, 81 | "required": [ 82 | "chunk_id", 83 | "normalized_id", 84 | "chunk_index", 85 | "text", 86 | "date", 87 | "source_path", 88 | "tokens" 89 | ], 90 | "title": "Chunk", 91 | "type": "object" 92 | } 93 | -------------------------------------------------------------------------------- /tests/pipelines/test_summarize.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from aijournal.domain.facts import DailySummary 4 | from aijournal.domain.journal import NormalizedEntry 5 | from aijournal.models.authoritative import JournalSection 6 | from aijournal.pipelines import summarize 7 | 8 | 9 | def _normalized_entry(entry_id: str, title: str) -> NormalizedEntry: 10 | return NormalizedEntry( 11 | id=entry_id, 12 | created_at="2024-01-02T09:00:00Z", 13 | source_path=f"data/journal/{entry_id}.md", 14 | title=title, 15 | tags=["focus"], 16 | sections=[JournalSection(heading="Highlights", level=2)], 17 | ) 18 | 19 | 20 | def test_generate_summary_uses_fake_path_when_requested() -> None: 21 | entries = [_normalized_entry("entry-1", "Deep Work")] 22 | 23 | def request_factory() -> DailySummary: # pragma: no cover - should not run 24 | msg = "request_factory should not be invoked for fake flows" 25 | raise AssertionError(msg) 26 | 27 | summary_result = summarize.generate_summary( 28 | entries, 29 | "2024-01-02", 30 | use_fake_llm=True, 31 | llm_summary=None, 32 | ) 33 | 34 | assert summary_result.day == "2024-01-02" 35 | assert summary_result.bullets[0].startswith("Deep Work") 36 | assert summary_result.todo_candidates 37 | 38 | 39 | def test_generate_summary_merges_llm_results_with_fallback() -> None: 40 | entries = [_normalized_entry("entry-1", "Deep Work")] 41 | response = DailySummary( 42 | day="", 43 | bullets=["Refined insight", ""], 44 | highlights=[], 45 | todo_candidates=["", "Review notes"], 46 | ) 47 | 48 | summary_result = summarize.generate_summary( 49 | entries, 50 | "2024-01-02", 51 | use_fake_llm=False, 52 | llm_summary=response, 53 | ) 54 | assert summary_result.day == "2024-01-02" 55 | assert summary_result.bullets == ["Refined insight"] 56 | assert summary_result.highlights == ["Refined insight"] 57 | assert summary_result.todo_candidates == ["Review notes"] 58 | -------------------------------------------------------------------------------- /docs/archive/2025-10-29_CLI_MIGRATION.md: -------------------------------------------------------------------------------- 1 | # CLI Migration Guide 2 | 3 | The refactor consolidates everyday commands at the top level and moves specialist tools under 4 | `aijournal ops ...`. Use this table to map legacy verbs to their new homes. 5 | 6 | | Legacy Command | Replacement | 7 | | -------------- | ----------- | 8 | | `aijournal ingest` | `aijournal capture --from ...` (everyday) or `aijournal ops pipeline ingest` (advanced) | 9 | | `aijournal new` | `aijournal capture --text/--edit ...` | 10 | | `aijournal facts` | `aijournal ops pipeline extract-facts` | 11 | | `aijournal summarize` | `aijournal ops pipeline summarize` | 12 | | `aijournal review-updates` | `aijournal ops pipeline review` | 13 | | `aijournal characterize` | `aijournal ops pipeline characterize` | 14 | | `aijournal profile suggest` | (unchanged) `aijournal ops profile suggest` | 15 | | `aijournal profile apply` | (unchanged) `aijournal ops profile apply` — usually run automatically by `capture` | 16 | | `aijournal profile status` | `aijournal status` (summary) or `aijournal ops profile status` (detailed) | 17 | | `aijournal tail` | `aijournal ops index update` | 18 | | `aijournal pack` | `aijournal export pack` | 19 | | `aijournal chatd` | `aijournal serve chat` | 20 | 21 | ## Everyday Flow 22 | 23 | ```sh 24 | uv run aijournal init --path ~/journal 25 | cd ~/journal 26 | uv run aijournal capture --text "What I learned today" --tag reflection 27 | uv run aijournal status 28 | uv run aijournal chat "What progress did I make?" 29 | uv run aijournal export pack --level L1 --format yaml 30 | ``` 31 | 32 | ## Advanced Pipelines 33 | 34 | Manual reruns remain available under `aijournal ops pipeline ...`. For example: 35 | 36 | ```sh 37 | # Re-run extraction on a specific day 38 | uv run aijournal ops pipeline extract-facts --date 2025-02-05 --retries 2 --progress 39 | 40 | # Ingest a directory in CI without refreshing downstream artifacts 41 | uv run aijournal ops pipeline ingest docs/notes --source-type notes --no-snapshot 42 | ``` 43 | 44 | All `ops` commands accept the same options they did previously; the refactor only reorganizes where 45 | you invoke them. 46 | -------------------------------------------------------------------------------- /tests/pipelines/test_persona.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from datetime import UTC, datetime 4 | 5 | import pytest 6 | 7 | from aijournal.domain.claims import ClaimAtom, Provenance, Scope 8 | from aijournal.pipelines import persona as persona_pipeline 9 | 10 | 11 | def _test_claim(claim_id: str, *, status: str = "accepted") -> ClaimAtom: 12 | return ClaimAtom( 13 | id=claim_id, 14 | type="preference", 15 | subject="Self", 16 | predicate="insight", 17 | statement=f"{claim_id} statement", 18 | scope=Scope(), 19 | strength=0.8, 20 | status=status, 21 | review_after_days=120, 22 | provenance=Provenance( 23 | sources=[], 24 | first_seen="2024-01-01", 25 | last_updated="2024-01-02T00:00:00Z", 26 | observation_count=1, 27 | ), 28 | ) 29 | 30 | 31 | def test_build_persona_core_requires_content() -> None: 32 | with pytest.raises(ValueError, match="Nothing to include in persona core"): 33 | persona_pipeline.build_persona_core( 34 | {}, 35 | [], 36 | token_budget=100, 37 | max_claims=5, 38 | min_claims=1, 39 | char_per_token=4.0, 40 | impact_weights={}, 41 | now=datetime(2024, 1, 2, tzinfo=UTC), 42 | ) 43 | 44 | 45 | def test_build_persona_core_trims_to_budget() -> None: 46 | profile = {"traits": {"strengths": ["Focused work"]}} 47 | claims = [_test_claim("claim-1", status="accepted"), _test_claim("claim-2", status="tentative")] 48 | 49 | result = persona_pipeline.build_persona_core( 50 | profile, 51 | claims, 52 | token_budget=1, 53 | max_claims=2, 54 | min_claims=1, 55 | char_per_token=1.0, 56 | impact_weights={}, 57 | now=datetime(2024, 1, 2, tzinfo=UTC), 58 | ) 59 | 60 | assert len(result.ranked_claims) == 2 61 | assert result.selection.trimmed_ids, "Expected trimming when budget is tight" 62 | assert len(result.persona.claims) == 1 63 | assert result.persona.profile == profile 64 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.events.FeedbackBatch.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "FeedbackAdjustmentEvent": { 4 | "description": "Record of a claim strength adjustment triggered by chat feedback.", 5 | "properties": { 6 | "claim_id": { 7 | "title": "Claim Id", 8 | "type": "string" 9 | }, 10 | "delta": { 11 | "title": "Delta", 12 | "type": "number" 13 | }, 14 | "kind": { 15 | "default": "feedback", 16 | "title": "Kind", 17 | "type": "string" 18 | }, 19 | "new_strength": { 20 | "title": "New Strength", 21 | "type": "number" 22 | }, 23 | "old_strength": { 24 | "title": "Old Strength", 25 | "type": "number" 26 | } 27 | }, 28 | "required": [ 29 | "claim_id", 30 | "old_strength", 31 | "new_strength", 32 | "delta" 33 | ], 34 | "title": "FeedbackAdjustmentEvent", 35 | "type": "object" 36 | }, 37 | "FeedbackDirection": { 38 | "enum": [ 39 | "up", 40 | "down" 41 | ], 42 | "title": "FeedbackDirection", 43 | "type": "string" 44 | } 45 | }, 46 | "description": "Batch of feedback adjustments queued for claim strength updates.", 47 | "properties": { 48 | "batch_id": { 49 | "title": "Batch Id", 50 | "type": "string" 51 | }, 52 | "created_at": { 53 | "title": "Created At", 54 | "type": "string" 55 | }, 56 | "events": { 57 | "items": { 58 | "$ref": "#/$defs/FeedbackAdjustmentEvent" 59 | }, 60 | "title": "Events", 61 | "type": "array" 62 | }, 63 | "feedback": { 64 | "$ref": "#/$defs/FeedbackDirection" 65 | }, 66 | "question": { 67 | "title": "Question", 68 | "type": "string" 69 | }, 70 | "session_id": { 71 | "title": "Session Id", 72 | "type": "string" 73 | } 74 | }, 75 | "required": [ 76 | "batch_id", 77 | "created_at", 78 | "session_id", 79 | "question", 80 | "feedback" 81 | ], 82 | "title": "FeedbackBatch", 83 | "type": "object" 84 | } 85 | -------------------------------------------------------------------------------- /tests/services/capture/test_stage_summarize.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Never 4 | 5 | import typer 6 | 7 | from aijournal.common.app_config import AppConfig 8 | from aijournal.services.capture import CaptureInput 9 | from aijournal.services.capture.stages import stage2_summarize 10 | 11 | if TYPE_CHECKING: 12 | from pathlib import Path 13 | 14 | 15 | def _make_inputs() -> CaptureInput: 16 | return CaptureInput(source="stdin", text="Sample entry") 17 | 18 | 19 | def test_stage2_summarize_success(tmp_path: Path, monkeypatch) -> None: 20 | summary_path = tmp_path / "derived" / "summaries" / "2025-10-27.yaml" 21 | summary_path.parent.mkdir(parents=True, exist_ok=True) 22 | 23 | called: list[str] = [] 24 | 25 | def fake_run( 26 | date: str, 27 | *, 28 | progress: bool, 29 | workspace: Path | None = None, 30 | config: AppConfig | None = None, 31 | ) -> Path: 32 | called.append(date) 33 | summary_path.write_text("summary", encoding="utf-8") 34 | return summary_path 35 | 36 | monkeypatch.setattr("aijournal.commands.summarize.run_summarize", fake_run) 37 | 38 | outputs = stage2_summarize.run_summarize_stage_2( 39 | ["2025-10-27"], 40 | _make_inputs(), 41 | tmp_path, 42 | AppConfig(), 43 | ) 44 | 45 | assert called == ["2025-10-27"] 46 | assert outputs.result.ok is True 47 | assert outputs.result.changed is True 48 | assert outputs.paths == ["derived/summaries/2025-10-27.yaml"] 49 | 50 | 51 | def test_stage2_summarize_handles_failure(tmp_path: Path, monkeypatch) -> None: 52 | def failing_run(*args, **kwargs) -> Never: 53 | raise typer.Exit(1) 54 | 55 | monkeypatch.setattr("aijournal.commands.summarize.run_summarize", failing_run) 56 | 57 | outputs = stage2_summarize.run_summarize_stage_2( 58 | ["2025-10-27"], 59 | _make_inputs(), 60 | tmp_path, 61 | AppConfig(), 62 | ) 63 | 64 | assert outputs.result.ok is False 65 | assert outputs.result.changed is False 66 | assert outputs.result.warnings 67 | assert outputs.paths == [] 68 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.index.RetrievedChunk.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Chunk returned from retrieval with a similarity score.", 3 | "properties": { 4 | "chunk_id": { 5 | "title": "Chunk Id", 6 | "type": "string" 7 | }, 8 | "chunk_index": { 9 | "title": "Chunk Index", 10 | "type": "integer" 11 | }, 12 | "chunk_type": { 13 | "default": "entry", 14 | "title": "Chunk Type", 15 | "type": "string" 16 | }, 17 | "date": { 18 | "title": "Date", 19 | "type": "string" 20 | }, 21 | "manifest_hash": { 22 | "anyOf": [ 23 | { 24 | "type": "string" 25 | }, 26 | { 27 | "type": "null" 28 | } 29 | ], 30 | "default": null, 31 | "title": "Manifest Hash" 32 | }, 33 | "normalized_id": { 34 | "title": "Normalized Id", 35 | "type": "string" 36 | }, 37 | "score": { 38 | "title": "Score", 39 | "type": "number" 40 | }, 41 | "source_hash": { 42 | "anyOf": [ 43 | { 44 | "type": "string" 45 | }, 46 | { 47 | "type": "null" 48 | } 49 | ], 50 | "default": null, 51 | "title": "Source Hash" 52 | }, 53 | "source_path": { 54 | "title": "Source Path", 55 | "type": "string" 56 | }, 57 | "source_type": { 58 | "anyOf": [ 59 | { 60 | "type": "string" 61 | }, 62 | { 63 | "type": "null" 64 | } 65 | ], 66 | "default": null, 67 | "title": "Source Type" 68 | }, 69 | "tags": { 70 | "items": { 71 | "type": "string" 72 | }, 73 | "title": "Tags", 74 | "type": "array" 75 | }, 76 | "text": { 77 | "title": "Text", 78 | "type": "string" 79 | }, 80 | "tokens": { 81 | "title": "Tokens", 82 | "type": "integer" 83 | } 84 | }, 85 | "required": [ 86 | "chunk_id", 87 | "normalized_id", 88 | "chunk_index", 89 | "text", 90 | "date", 91 | "source_path", 92 | "tokens", 93 | "score" 94 | ], 95 | "title": "RetrievedChunk", 96 | "type": "object" 97 | } 98 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.chat_sessions.ChatLearningEntry.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "ChatTelemetry": { 4 | "description": "Telemetry captured during a chat turn.", 5 | "properties": { 6 | "chunk_count": { 7 | "title": "Chunk Count", 8 | "type": "integer" 9 | }, 10 | "model": { 11 | "title": "Model", 12 | "type": "string" 13 | }, 14 | "retrieval_ms": { 15 | "title": "Retrieval Ms", 16 | "type": "number" 17 | }, 18 | "retriever_source": { 19 | "title": "Retriever Source", 20 | "type": "string" 21 | } 22 | }, 23 | "required": [ 24 | "retrieval_ms", 25 | "chunk_count", 26 | "retriever_source", 27 | "model" 28 | ], 29 | "title": "ChatTelemetry", 30 | "type": "object" 31 | } 32 | }, 33 | "description": "Entry capturing a single learning from a chat turn.", 34 | "properties": { 35 | "citations": { 36 | "items": { 37 | "type": "string" 38 | }, 39 | "title": "Citations", 40 | "type": "array" 41 | }, 42 | "clarifying_question": { 43 | "anyOf": [ 44 | { 45 | "type": "string" 46 | }, 47 | { 48 | "type": "null" 49 | } 50 | ], 51 | "default": null, 52 | "title": "Clarifying Question" 53 | }, 54 | "feedback": { 55 | "anyOf": [ 56 | { 57 | "type": "string" 58 | }, 59 | { 60 | "type": "null" 61 | } 62 | ], 63 | "default": null, 64 | "title": "Feedback" 65 | }, 66 | "intent": { 67 | "title": "Intent", 68 | "type": "string" 69 | }, 70 | "question": { 71 | "title": "Question", 72 | "type": "string" 73 | }, 74 | "telemetry": { 75 | "$ref": "#/$defs/ChatTelemetry" 76 | }, 77 | "turn_index": { 78 | "title": "Turn Index", 79 | "type": "integer" 80 | } 81 | }, 82 | "required": [ 83 | "turn_index", 84 | "question", 85 | "intent", 86 | "telemetry" 87 | ], 88 | "title": "ChatLearningEntry", 89 | "type": "object" 90 | } 91 | -------------------------------------------------------------------------------- /src/aijournal/services/microfacts/snapshot.py: -------------------------------------------------------------------------------- 1 | """Helpers for loading and filtering consolidated microfacts snapshots.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from typing import TYPE_CHECKING, Any 7 | 8 | from pydantic import ValidationError 9 | 10 | from aijournal.domain.facts import ConsolidatedMicroFact, ConsolidatedMicrofactsFile 11 | from aijournal.io.artifacts import load_artifact_data 12 | 13 | if TYPE_CHECKING: 14 | from aijournal.common.app_config import AppConfig 15 | 16 | 17 | def _consolidated_path(workspace: Path, config: AppConfig) -> Path: 18 | derived = Path(config.paths.derived) 19 | if not derived.is_absolute(): 20 | derived = workspace / derived 21 | return derived / "microfacts" / "consolidated.yaml" 22 | 23 | 24 | def load_consolidated_microfacts( 25 | workspace: Path, 26 | config: AppConfig, 27 | ) -> ConsolidatedMicrofactsFile | None: 28 | """Return the consolidated snapshot if it exists and validates.""" 29 | path = _consolidated_path(workspace, config) 30 | if not path.exists(): 31 | return None 32 | try: 33 | return load_artifact_data(path, ConsolidatedMicrofactsFile) 34 | except ValidationError: 35 | return None 36 | 37 | 38 | def select_recurring_facts( 39 | snapshot: ConsolidatedMicrofactsFile, 40 | *, 41 | min_observations: int = 2, 42 | limit: int = 20, 43 | ) -> list[dict[str, Any]]: 44 | """Return the strongest recurring facts for prompt context.""" 45 | candidates: list[ConsolidatedMicroFact] = [ 46 | fact for fact in snapshot.facts if fact.observation_count >= min_observations 47 | ] 48 | sorted_facts = sorted( 49 | candidates, 50 | key=lambda fact: (-fact.observation_count, fact.last_seen, fact.id), 51 | )[:limit] 52 | return [ 53 | { 54 | "statement": fact.statement, 55 | "observation_count": fact.observation_count, 56 | "first_seen": fact.first_seen, 57 | "last_seen": fact.last_seen, 58 | "contexts": fact.contexts, 59 | "evidence_entries": fact.evidence_entries, 60 | } 61 | for fact in sorted_facts 62 | ] 63 | -------------------------------------------------------------------------------- /tests/scripts/test_check_structured_metrics.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | import subprocess 5 | import sys 6 | from pathlib import Path 7 | 8 | SCRIPT = Path(__file__).resolve().parents[2] / "scripts" / "check_structured_metrics.py" 9 | 10 | 11 | def _write_metrics(path: Path, entries: list[dict[str, object]]) -> None: 12 | path.parent.mkdir(parents=True, exist_ok=True) 13 | with path.open("w", encoding="utf-8") as handle: 14 | for entry in entries: 15 | handle.write(json.dumps(entry) + "\n") 16 | 17 | 18 | def test_check_structured_metrics_passes(tmp_path: Path) -> None: 19 | metrics_path = tmp_path / "metrics.jsonl" 20 | _write_metrics( 21 | metrics_path, 22 | [ 23 | {"repair_attempts": 0, "coercion_count": 1}, 24 | {"repair_attempts": 1, "coercion_count": 2}, 25 | ], 26 | ) 27 | 28 | result = subprocess.run( 29 | [ 30 | sys.executable, 31 | str(SCRIPT), 32 | "--path", 33 | str(metrics_path), 34 | "--max-repair-rate", 35 | "0.6", 36 | "--max-avg-coercions", 37 | "3.0", 38 | ], 39 | cwd=tmp_path, 40 | check=False, 41 | capture_output=True, 42 | text=True, 43 | ) 44 | 45 | assert result.returncode == 0, result.stderr 46 | 47 | 48 | def test_check_structured_metrics_fails_when_exceeding_threshold(tmp_path: Path) -> None: 49 | metrics_path = tmp_path / "metrics.jsonl" 50 | _write_metrics( 51 | metrics_path, 52 | [ 53 | {"repair_attempts": 5, "coercion_count": 20}, 54 | ], 55 | ) 56 | 57 | result = subprocess.run( 58 | [ 59 | sys.executable, 60 | str(SCRIPT), 61 | "--path", 62 | str(metrics_path), 63 | "--max-repair-rate", 64 | "0.1", 65 | "--max-avg-coercions", 66 | "3.0", 67 | ], 68 | cwd=tmp_path, 69 | check=False, 70 | capture_output=True, 71 | text=True, 72 | ) 73 | 74 | assert result.returncode == 1 75 | assert "Repair rate" in result.stdout 76 | -------------------------------------------------------------------------------- /src/aijournal/common/meta.py: -------------------------------------------------------------------------------- 1 | """Artifact envelope primitives shared across aijournal.""" 2 | 3 | from __future__ import annotations 4 | 5 | from enum import StrEnum 6 | from typing import Generic, TypeVar 7 | 8 | from pydantic import Field 9 | 10 | from .base import StrictModel 11 | from .types import TimestampStr # noqa: TC001 12 | 13 | T = TypeVar("T") 14 | 15 | 16 | class ArtifactMeta(StrictModel): 17 | """Metadata describing how an artifact was produced.""" 18 | 19 | created_at: TimestampStr 20 | model: str | None = None 21 | prompt_path: str | None = None 22 | prompt_hash: str | None = None 23 | prompt_kind: str | None = None 24 | prompt_set: str | None = None 25 | char_per_token: float | None = None 26 | notes: dict[str, str] | None = None 27 | 28 | 29 | class ArtifactKind(StrEnum): 30 | """Enumeration of persisted artifact categories.""" 31 | 32 | PERSONA_CORE = "persona.core" 33 | SUMMARY_DAILY = "summaries.daily" 34 | MICROFACTS_DAILY = "microfacts.daily" 35 | MICROFACTS_CONSOLIDATED = "microfacts.consolidated" 36 | MICROFACTS_LOG = "microfacts.log" 37 | PROFILE_PROPOSALS = "profile.proposals" 38 | PROFILE_UPDATES = "profile.updates" 39 | FEEDBACK_BATCH = "feedback.batch" 40 | INDEX_META = "index.meta" 41 | INDEX_CHUNKS = "index.chunks" 42 | PACK_L1 = "pack.L1" 43 | PACK_L2 = "pack.L2" 44 | PACK_L3 = "pack.L3" 45 | PACK_L4 = "pack.L4" 46 | CHAT_TRANSCRIPT = "chat.transcript" 47 | CHAT_SUMMARY = "chat.summary" 48 | CHAT_LEARNINGS = "chat.learnings" 49 | ADVICE_CARD = "advice.card" 50 | 51 | 52 | class Artifact(StrictModel, Generic[T]): 53 | """Artifact envelope wrapping a payload of type ``T``.""" 54 | 55 | kind: ArtifactKind 56 | meta: ArtifactMeta 57 | data: T 58 | 59 | 60 | class LLMResult(StrictModel, Generic[T]): 61 | """Captured LLM invocation details paired with the structured payload.""" 62 | 63 | model: str 64 | prompt_path: str 65 | prompt_hash: str | None = None 66 | prompt_kind: str | None = None 67 | prompt_set: str | None = None 68 | created_at: TimestampStr 69 | payload: T 70 | attempts: int = 1 71 | coercions_applied: list[dict[str, str]] = Field(default_factory=list) 72 | -------------------------------------------------------------------------------- /src/aijournal/pipelines/summarize.py: -------------------------------------------------------------------------------- 1 | """Pipeline orchestration for daily summary generation.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING 6 | 7 | from aijournal.domain.facts import DailySummary 8 | from aijournal.fakes import fake_summarize 9 | 10 | if TYPE_CHECKING: 11 | from collections.abc import Sequence 12 | 13 | from aijournal.domain.journal import NormalizedEntry 14 | 15 | 16 | def _todo_from_entries(entries: Sequence[NormalizedEntry]) -> list[str]: 17 | todos: list[str] = [] 18 | for entry in entries[:3]: 19 | title = entry.title or entry.id or "entry" 20 | todos.append(f"Review follow-ups from {title}") 21 | return todos or ["Capture explicit next actions in tomorrow's entry."] 22 | 23 | 24 | def generate_summary( 25 | entries: Sequence[NormalizedEntry], 26 | date: str, 27 | *, 28 | use_fake_llm: bool, 29 | llm_summary: DailySummary | None, 30 | ) -> DailySummary: 31 | """Produce a `DailySummary` for the given date.""" 32 | 33 | def fallback_model() -> DailySummary: 34 | return fake_summarize(entries, date, todo_builder=_todo_from_entries) 35 | 36 | if use_fake_llm: 37 | return fallback_model() 38 | 39 | if llm_summary is None: 40 | msg = "llm_summary must be provided when fake mode is disabled" 41 | raise ValueError(msg) 42 | 43 | bullets = [item for item in llm_summary.bullets if item] 44 | highlights = [item for item in llm_summary.highlights if item] 45 | todo_candidates = [item for item in llm_summary.todo_candidates if item] 46 | 47 | if not bullets: 48 | fallback = fallback_model() 49 | bullets = fallback.bullets 50 | if not highlights: 51 | highlights = fallback.highlights 52 | if not todo_candidates: 53 | todo_candidates = fallback.todo_candidates 54 | 55 | if not highlights: 56 | highlights = bullets[:3] 57 | if not todo_candidates: 58 | todo_candidates = _todo_from_entries(entries) 59 | 60 | day = llm_summary.day or date 61 | 62 | return DailySummary( 63 | day=day, 64 | bullets=bullets, 65 | highlights=highlights, 66 | todo_candidates=todo_candidates, 67 | ) 68 | -------------------------------------------------------------------------------- /schemas/core/aijournal.common.meta.ArtifactMeta.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Metadata describing how an artifact was produced.", 3 | "properties": { 4 | "char_per_token": { 5 | "anyOf": [ 6 | { 7 | "type": "number" 8 | }, 9 | { 10 | "type": "null" 11 | } 12 | ], 13 | "default": null, 14 | "title": "Char Per Token" 15 | }, 16 | "created_at": { 17 | "title": "Created At", 18 | "type": "string" 19 | }, 20 | "model": { 21 | "anyOf": [ 22 | { 23 | "type": "string" 24 | }, 25 | { 26 | "type": "null" 27 | } 28 | ], 29 | "default": null, 30 | "title": "Model" 31 | }, 32 | "notes": { 33 | "anyOf": [ 34 | { 35 | "additionalProperties": { 36 | "type": "string" 37 | }, 38 | "type": "object" 39 | }, 40 | { 41 | "type": "null" 42 | } 43 | ], 44 | "default": null, 45 | "title": "Notes" 46 | }, 47 | "prompt_hash": { 48 | "anyOf": [ 49 | { 50 | "type": "string" 51 | }, 52 | { 53 | "type": "null" 54 | } 55 | ], 56 | "default": null, 57 | "title": "Prompt Hash" 58 | }, 59 | "prompt_kind": { 60 | "anyOf": [ 61 | { 62 | "type": "string" 63 | }, 64 | { 65 | "type": "null" 66 | } 67 | ], 68 | "default": null, 69 | "title": "Prompt Kind" 70 | }, 71 | "prompt_path": { 72 | "anyOf": [ 73 | { 74 | "type": "string" 75 | }, 76 | { 77 | "type": "null" 78 | } 79 | ], 80 | "default": null, 81 | "title": "Prompt Path" 82 | }, 83 | "prompt_set": { 84 | "anyOf": [ 85 | { 86 | "type": "string" 87 | }, 88 | { 89 | "type": "null" 90 | } 91 | ], 92 | "default": null, 93 | "title": "Prompt Set" 94 | } 95 | }, 96 | "required": [ 97 | "created_at" 98 | ], 99 | "title": "ArtifactMeta", 100 | "type": "object" 101 | } 102 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.changes.ClaimAtomInput.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "ClaimStatus": { 4 | "enum": [ 5 | "accepted", 6 | "tentative", 7 | "rejected" 8 | ], 9 | "title": "ClaimStatus", 10 | "type": "string" 11 | }, 12 | "ClaimType": { 13 | "enum": [ 14 | "preference", 15 | "value", 16 | "goal", 17 | "boundary", 18 | "trait", 19 | "habit", 20 | "aversion", 21 | "skill" 22 | ], 23 | "title": "ClaimType", 24 | "type": "string" 25 | }, 26 | "Scope": { 27 | "description": "Contextual qualifiers for a claim atom.", 28 | "properties": { 29 | "context": { 30 | "items": { 31 | "type": "string" 32 | }, 33 | "title": "Context", 34 | "type": "array" 35 | }, 36 | "domain": { 37 | "anyOf": [ 38 | { 39 | "type": "string" 40 | }, 41 | { 42 | "type": "null" 43 | } 44 | ], 45 | "default": null, 46 | "title": "Domain" 47 | } 48 | }, 49 | "title": "Scope", 50 | "type": "object" 51 | } 52 | }, 53 | "description": "Normalized claim payload without identifiers or provenance.", 54 | "properties": { 55 | "predicate": { 56 | "title": "Predicate", 57 | "type": "string" 58 | }, 59 | "review_after_days": { 60 | "title": "Review After Days", 61 | "type": "integer" 62 | }, 63 | "scope": { 64 | "$ref": "#/$defs/Scope" 65 | }, 66 | "statement": { 67 | "title": "Statement", 68 | "type": "string" 69 | }, 70 | "status": { 71 | "$ref": "#/$defs/ClaimStatus" 72 | }, 73 | "strength": { 74 | "title": "Strength", 75 | "type": "number" 76 | }, 77 | "subject": { 78 | "title": "Subject", 79 | "type": "string" 80 | }, 81 | "type": { 82 | "$ref": "#/$defs/ClaimType" 83 | } 84 | }, 85 | "required": [ 86 | "type", 87 | "subject", 88 | "predicate", 89 | "statement", 90 | "scope", 91 | "strength", 92 | "status", 93 | "review_after_days" 94 | ], 95 | "title": "ClaimAtomInput", 96 | "type": "object" 97 | } 98 | -------------------------------------------------------------------------------- /tests/io_tests/test_artifacts.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | import yaml 6 | 7 | from aijournal.common.base import StrictModel 8 | from aijournal.common.meta import Artifact, ArtifactKind, ArtifactMeta 9 | from aijournal.io.artifacts import load_artifact, load_artifact_data, save_artifact 10 | 11 | if TYPE_CHECKING: 12 | from pathlib import Path 13 | 14 | 15 | class _Payload(StrictModel): 16 | value: int 17 | 18 | 19 | def _make_artifact(value: int = 1) -> Artifact[_Payload]: 20 | return Artifact[_Payload]( 21 | kind=ArtifactKind.SUMMARY_DAILY, 22 | meta=ArtifactMeta(created_at="2025-10-29T00:00:00Z"), 23 | data=_Payload(value=value), 24 | ) 25 | 26 | 27 | def test_save_artifact_writes_deterministic_yaml(tmp_path: Path) -> None: 28 | artifact = _make_artifact() 29 | path = tmp_path / "artifact.yaml" 30 | 31 | save_artifact(path, artifact) 32 | 33 | text = path.read_text(encoding="utf-8") 34 | assert text.endswith("\n") 35 | assert text.splitlines()[0] == "data:" 36 | 37 | loaded_yaml = yaml.safe_load(text) 38 | assert "schema" not in loaded_yaml 39 | assert loaded_yaml["kind"] == ArtifactKind.SUMMARY_DAILY.value 40 | 41 | 42 | def test_save_artifact_json(tmp_path: Path) -> None: 43 | artifact = _make_artifact(2) 44 | path = tmp_path / "artifact.json" 45 | 46 | save_artifact(path, artifact) 47 | 48 | text = path.read_text(encoding="utf-8") 49 | assert text.endswith("\n") 50 | assert text.strip().startswith("{") 51 | 52 | loaded = load_artifact(path, _Payload) 53 | assert loaded.data.value == 2 54 | 55 | 56 | def test_load_artifact_roundtrip(tmp_path: Path) -> None: 57 | artifact = _make_artifact(3) 58 | path = tmp_path / "artifact.yaml" 59 | save_artifact(path, artifact) 60 | 61 | loaded = load_artifact(path, _Payload) 62 | assert isinstance(loaded.data, _Payload) 63 | assert loaded.data.value == 3 64 | 65 | 66 | def test_load_artifact_data_returns_payload(tmp_path: Path) -> None: 67 | artifact = _make_artifact(5) 68 | path = tmp_path / "example.yaml" 69 | save_artifact(path, artifact) 70 | 71 | payload = load_artifact_data(path, _Payload) 72 | assert isinstance(payload, _Payload) 73 | assert payload.value == 5 74 | -------------------------------------------------------------------------------- /src/aijournal/domain/chat_sessions.py: -------------------------------------------------------------------------------- 1 | """Structured models for persisted chat session artifacts.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pydantic import Field 6 | 7 | from aijournal.common.base import StrictModel 8 | from aijournal.common.types import TimestampStr # noqa: TC001 9 | from aijournal.domain.chat import ChatTelemetry # noqa: TC001 10 | 11 | 12 | class ChatTranscriptTurn(StrictModel): 13 | """Captured question/answer pair within a chat transcript.""" 14 | 15 | turn_index: int 16 | timestamp: TimestampStr 17 | question: str 18 | answer: str 19 | intent: str 20 | citations: list[str] = Field(default_factory=list) 21 | clarifying_question: str | None = None 22 | telemetry: ChatTelemetry 23 | feedback: str | None = None 24 | fake_mode: bool 25 | 26 | 27 | class ChatTranscript(StrictModel): 28 | """Artifact describing a full chat session transcript.""" 29 | 30 | session_id: str 31 | created_at: TimestampStr 32 | updated_at: TimestampStr 33 | turns: list[ChatTranscriptTurn] = Field(default_factory=list) 34 | 35 | 36 | class ChatSessionSummary(StrictModel): 37 | """Aggregated summary metadata for a chat session.""" 38 | 39 | session_id: str 40 | created_at: TimestampStr 41 | updated_at: TimestampStr 42 | turn_count: int = 0 43 | intent_counts: dict[str, int] = Field(default_factory=dict) 44 | last_question: str | None = None 45 | last_answer_preview: str | None = None 46 | last_citations: list[str] = Field(default_factory=list) 47 | last_clarifying_question: str | None = None 48 | last_retrieval_ms: float | None = None 49 | last_feedback: str | None = None 50 | 51 | 52 | class ChatLearningEntry(StrictModel): 53 | """Entry capturing a single learning from a chat turn.""" 54 | 55 | turn_index: int 56 | question: str 57 | intent: str 58 | citations: list[str] = Field(default_factory=list) 59 | clarifying_question: str | None = None 60 | telemetry: ChatTelemetry 61 | feedback: str | None = None 62 | 63 | 64 | class ChatSessionLearnings(StrictModel): 65 | """Rollup of learnings captured across a chat session.""" 66 | 67 | session_id: str 68 | created_at: TimestampStr 69 | updated_at: TimestampStr 70 | learnings: list[ChatLearningEntry] = Field(default_factory=list) 71 | -------------------------------------------------------------------------------- /src/aijournal/domain/facts.py: -------------------------------------------------------------------------------- 1 | """Domain models for extracted facts and daily summaries.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pydantic import Field 6 | 7 | from aijournal.common.base import StrictModel 8 | from aijournal.domain.changes import ClaimProposal # noqa: TC001 9 | from aijournal.domain.evidence import SourceRef 10 | 11 | 12 | class DailySummary(StrictModel): 13 | """Derived day summary (PLAN §4.1).""" 14 | 15 | day: str 16 | bullets: list[str] = Field(default_factory=list) 17 | highlights: list[str] = Field(default_factory=list) 18 | todo_candidates: list[str] = Field(default_factory=list) 19 | 20 | 21 | FactEvidence = SourceRef 22 | 23 | 24 | class MicroFact(StrictModel): 25 | id: str 26 | statement: str 27 | confidence: float 28 | evidence: FactEvidence 29 | first_seen: str | None = None 30 | last_seen: str | None = None 31 | 32 | 33 | class MicroFactsFile(StrictModel): 34 | facts: list[MicroFact] = Field(default_factory=list) 35 | claim_proposals: list[ClaimProposal] = Field(default_factory=list) 36 | 37 | 38 | class ConsolidatedMicroFact(StrictModel): 39 | """Global microfact entry that survives consolidation runs.""" 40 | 41 | id: str 42 | statement: str 43 | canonical_statement: str 44 | confidence: float 45 | first_seen: str 46 | last_seen: str 47 | observation_count: int 48 | domain: str | None = None 49 | contexts: list[str] = Field(default_factory=list) 50 | evidence_entries: list[str] = Field(default_factory=list) 51 | source_fact_ids: list[str] = Field(default_factory=list) 52 | 53 | 54 | class ConsolidatedMicrofactsFile(StrictModel): 55 | """Artifact capturing the global consolidated microfact snapshot.""" 56 | 57 | generated_at: str 58 | embedding_model: str | None = None 59 | facts: list[ConsolidatedMicroFact] = Field(default_factory=list) 60 | 61 | 62 | class MicrofactConsolidationSummary(StrictModel): 63 | """Per-day summary emitted during rebuild operations.""" 64 | 65 | day: str 66 | processed: int 67 | new_records: int 68 | merged_records: int 69 | 70 | 71 | class MicrofactConsolidationLog(StrictModel): 72 | """Artifact capturing the rebuild run summaries.""" 73 | 74 | generated_at: str 75 | entries: list[MicrofactConsolidationSummary] = Field(default_factory=list) 76 | -------------------------------------------------------------------------------- /schemas/core/aijournal.models.authoritative.ManifestEntry.json: -------------------------------------------------------------------------------- 1 | { 2 | "additionalProperties": true, 3 | "description": "Manifest row describing an ingested Markdown source.", 4 | "properties": { 5 | "aliases": { 6 | "items": { 7 | "type": "string" 8 | }, 9 | "title": "Aliases", 10 | "type": "array" 11 | }, 12 | "canonical_journal_path": { 13 | "anyOf": [ 14 | { 15 | "type": "string" 16 | }, 17 | { 18 | "type": "null" 19 | } 20 | ], 21 | "default": null, 22 | "title": "Canonical Journal Path" 23 | }, 24 | "created_at": { 25 | "title": "Created At", 26 | "type": "string" 27 | }, 28 | "hash": { 29 | "title": "Hash", 30 | "type": "string" 31 | }, 32 | "id": { 33 | "title": "Id", 34 | "type": "string" 35 | }, 36 | "ingested_at": { 37 | "title": "Ingested At", 38 | "type": "string" 39 | }, 40 | "model": { 41 | "anyOf": [ 42 | { 43 | "type": "string" 44 | }, 45 | { 46 | "type": "null" 47 | } 48 | ], 49 | "default": null, 50 | "title": "Model" 51 | }, 52 | "normalized": { 53 | "title": "Normalized", 54 | "type": "string" 55 | }, 56 | "path": { 57 | "title": "Path", 58 | "type": "string" 59 | }, 60 | "snapshot_path": { 61 | "anyOf": [ 62 | { 63 | "type": "string" 64 | }, 65 | { 66 | "type": "null" 67 | } 68 | ], 69 | "default": null, 70 | "title": "Snapshot Path" 71 | }, 72 | "source_type": { 73 | "anyOf": [ 74 | { 75 | "type": "string" 76 | }, 77 | { 78 | "type": "null" 79 | } 80 | ], 81 | "default": null, 82 | "title": "Source Type" 83 | }, 84 | "tags": { 85 | "items": { 86 | "type": "string" 87 | }, 88 | "title": "Tags", 89 | "type": "array" 90 | } 91 | }, 92 | "required": [ 93 | "hash", 94 | "path", 95 | "normalized", 96 | "ingested_at", 97 | "created_at", 98 | "id" 99 | ], 100 | "title": "ManifestEntry", 101 | "type": "object" 102 | } 103 | -------------------------------------------------------------------------------- /src/aijournal/models/authoritative.py: -------------------------------------------------------------------------------- 1 | """Authoritative data models for aijournal.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Any 6 | 7 | from pydantic import ConfigDict, Field 8 | 9 | from aijournal.domain.claims import ClaimAtom # noqa: TC001 10 | from aijournal.domain.journal import Section 11 | 12 | from .base import AijournalModel 13 | 14 | JsonScalar = str | int | float | bool | None 15 | JsonValue = JsonScalar | list[Any] | dict[str, Any] 16 | 17 | 18 | class ManifestEntry(AijournalModel): 19 | """Manifest row describing an ingested Markdown source.""" 20 | 21 | model_config = ConfigDict( 22 | extra="allow", 23 | populate_by_name=True, 24 | arbitrary_types_allowed=True, 25 | ) 26 | 27 | hash: str 28 | path: str 29 | normalized: str 30 | source_type: str | None = None 31 | ingested_at: str 32 | created_at: str 33 | id: str 34 | tags: list[str] = Field(default_factory=list) 35 | model: str | None = None 36 | canonical_journal_path: str | None = None 37 | snapshot_path: str | None = None 38 | aliases: list[str] = Field(default_factory=list) 39 | 40 | 41 | class JournalEntry(AijournalModel): 42 | """Human-authored Markdown entry metadata.""" 43 | 44 | id: str 45 | created_at: str 46 | title: str 47 | tags: list[str] = Field(default_factory=list) 48 | mood: str | None = None 49 | projects: list[str] = Field(default_factory=list) 50 | summary: str | None = None 51 | 52 | 53 | JournalSection = Section 54 | 55 | 56 | class ClaimsFile(AijournalModel): 57 | claims: list[ClaimAtom] = Field(default_factory=list) 58 | 59 | 60 | class SelfProfile(AijournalModel): 61 | traits: dict[str, Any] = Field(default_factory=dict) 62 | values_motivations: dict[str, Any] = Field(default_factory=dict) 63 | goals: dict[str, Any] = Field(default_factory=dict) 64 | decision_style: dict[str, Any] = Field(default_factory=dict) 65 | affect_energy: dict[str, Any] = Field(default_factory=dict) 66 | planning: dict[str, Any] = Field(default_factory=dict) 67 | dashboard: dict[str, Any] = Field(default_factory=dict) 68 | habits: dict[str, Any] = Field(default_factory=dict) 69 | social: dict[str, Any] = Field(default_factory=dict) 70 | boundaries_ethics: dict[str, Any] = Field(default_factory=dict) 71 | coaching_prefs: dict[str, Any] = Field(default_factory=dict) 72 | -------------------------------------------------------------------------------- /src/aijournal/domain/events.py: -------------------------------------------------------------------------------- 1 | """Domain models describing claim change events and feedback adjustments.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Annotated 6 | 7 | from pydantic import Field 8 | 9 | from aijournal.common.base import StrictModel 10 | from aijournal.domain.claims import ClaimSource # noqa: TC001 11 | from aijournal.domain.enums import ClaimEventAction, FeedbackDirection # noqa: TC001 12 | 13 | 14 | class ClaimSignaturePayload(StrictModel): 15 | """Serialized signature describing the target slot for a claim.""" 16 | 17 | claim_type: str 18 | subject: str 19 | predicate: str 20 | domain: str | None = None 21 | context: list[str] = Field(default_factory=list) 22 | 23 | 24 | class ClaimConflictPayload(StrictModel): 25 | """Structured conflict emitted during consolidation previews.""" 26 | 27 | claim_id: str 28 | signature: ClaimSignaturePayload 29 | statement: str 30 | existing_statement: str 31 | incoming_statement: str 32 | incoming_sources: list[ClaimSource] = Field(default_factory=list) 33 | 34 | 35 | class ClaimPreviewEvent(StrictModel): 36 | """Outcome of attempting to merge a claim proposal into existing atoms.""" 37 | 38 | kind: str = "preview" 39 | action: ClaimEventAction 40 | claim_id: str 41 | delta_strength: float | None = None 42 | statement: str | None = None 43 | strength: float | None = None 44 | signature: ClaimSignaturePayload | None = None 45 | conflict: ClaimConflictPayload | None = None 46 | related_claim_id: str | None = None 47 | related_action: str | None = None 48 | related_signature: ClaimSignaturePayload | None = None 49 | 50 | 51 | class FeedbackAdjustmentEvent(StrictModel): 52 | """Record of a claim strength adjustment triggered by chat feedback.""" 53 | 54 | kind: str = "feedback" 55 | claim_id: str 56 | old_strength: float 57 | new_strength: float 58 | delta: float 59 | 60 | 61 | ClaimChangeEvent = Annotated[ 62 | ClaimPreviewEvent | FeedbackAdjustmentEvent, 63 | Field(discriminator="kind"), 64 | ] 65 | 66 | 67 | class FeedbackBatch(StrictModel): 68 | """Batch of feedback adjustments queued for claim strength updates.""" 69 | 70 | batch_id: str 71 | created_at: str 72 | session_id: str 73 | question: str 74 | feedback: FeedbackDirection 75 | events: list[FeedbackAdjustmentEvent] = Field(default_factory=list) 76 | -------------------------------------------------------------------------------- /src/aijournal/schema.py: -------------------------------------------------------------------------------- 1 | """Pydantic-backed validation helpers for aijournal payloads.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING, Any 6 | 7 | from pydantic import BaseModel, ValidationError 8 | 9 | from aijournal.domain.facts import DailySummary, MicroFactsFile 10 | from aijournal.domain.journal import NormalizedEntry 11 | from aijournal.domain.persona import InterviewSet, PersonaCore 12 | from aijournal.models.authoritative import ClaimsFile, JournalEntry, SelfProfile 13 | from aijournal.models.derived import ( 14 | AdviceCard, 15 | ProfileUpdateBatch, 16 | ) 17 | 18 | if TYPE_CHECKING: 19 | from collections.abc import Iterable 20 | 21 | 22 | class SchemaValidationError(ValueError): 23 | """Raised when a payload does not conform to a named schema.""" 24 | 25 | def __init__(self, schema: str, errors: Iterable[str]) -> None: 26 | self.schema = schema 27 | self.errors = list(errors) 28 | message = f"Schema '{schema}' validation failed: {'; '.join(self.errors)}" 29 | super().__init__(message) 30 | 31 | 32 | _MODEL_REGISTRY: dict[str, type[BaseModel]] = { 33 | "advice": AdviceCard, 34 | "claims": ClaimsFile, 35 | "interviews": InterviewSet, 36 | "journal_entry": JournalEntry, 37 | "microfacts": MicroFactsFile, 38 | "normalized_entry": NormalizedEntry, 39 | "persona_core": PersonaCore, 40 | "profile_updates": ProfileUpdateBatch, 41 | "self_profile": SelfProfile, 42 | "summary": DailySummary, 43 | } 44 | 45 | 46 | def _resolve_model(schema_name: str) -> type[BaseModel]: 47 | try: 48 | return _MODEL_REGISTRY[schema_name] 49 | except KeyError as exc: # pragma: no cover - defensive guard 50 | msg = f"Unknown schema requested: {schema_name}" 51 | raise ValueError(msg) from exc 52 | 53 | 54 | def validate_schema(schema_name: str, payload: Any) -> None: 55 | """Validate payload against the named schema or raise SchemaValidationError.""" 56 | model = _resolve_model(schema_name) 57 | errors: list[str] = [] 58 | try: 59 | model.model_validate(payload) 60 | except ValidationError as exc: 61 | for err in exc.errors(): 62 | location = ".".join(str(part) for part in err.get("loc", ())) or "" 63 | errors.append(f"{location}: {err.get('msg', 'invalid value')}") 64 | if errors: 65 | raise SchemaValidationError(schema_name, errors) 66 | -------------------------------------------------------------------------------- /tests/simulator/test_human_simulator.py: -------------------------------------------------------------------------------- 1 | """Tests for the human-style simulator harness.""" 2 | 3 | from __future__ import annotations 4 | 5 | import shutil 6 | from typing import TYPE_CHECKING 7 | 8 | import pytest 9 | 10 | from aijournal.simulator.orchestrator import HumanSimulator 11 | from aijournal.simulator.validators import StageValidatorRegistry, ValidatorContext 12 | 13 | if TYPE_CHECKING: 14 | from pathlib import Path 15 | 16 | 17 | @pytest.fixture 18 | def simulator_workspace(tmp_path: Path) -> Path: 19 | return tmp_path / "sim-workspace" 20 | 21 | 22 | def test_simulator_runs_end_to_end( 23 | monkeypatch: pytest.MonkeyPatch, 24 | simulator_workspace: Path, 25 | ) -> None: 26 | monkeypatch.setenv("AIJOURNAL_FAKE_OLLAMA", "1") 27 | simulator = HumanSimulator(max_stage=7) 28 | report = simulator.run(workspace=simulator_workspace, keep_workspace=True) 29 | 30 | assert report.validation.ok 31 | 32 | expected_stage_map = { 33 | 0: {"persist"}, 34 | 1: {"normalize"}, 35 | 2: {"derive.summarize"}, 36 | 3: {"derive.extract_facts"}, 37 | 4: {"derive.profile_update", "derive.review"}, 38 | 5: {"refresh.index"}, 39 | 6: {"refresh.persona"}, 40 | 7: {"derive.pack"}, 41 | } 42 | seen_stage_ids: set[int] = set() 43 | for stage_result in report.capture_result.stage_results: 44 | for stage_id, names in expected_stage_map.items(): 45 | if stage_result.stage in names: 46 | seen_stage_ids.add(stage_id) 47 | break 48 | 49 | assert seen_stage_ids == set(range(8)), "missing stage results for one or more stages" 50 | assert report.workspace.exists() 51 | 52 | changed_dates = { 53 | entry.date for entry in report.capture_result.entries if entry.changed and not entry.deduped 54 | } 55 | assert changed_dates # sanity guard 56 | 57 | # Tamper with one artifact to ensure validators catch regressions. 58 | first_date = sorted(changed_dates)[0] 59 | microfacts_path = report.workspace / "derived" / "microfacts" / f"{first_date}.yaml" 60 | if microfacts_path.exists(): 61 | microfacts_path.unlink() 62 | tampered = StageValidatorRegistry().run( 63 | ValidatorContext(workspace=report.workspace, capture=report.capture_result), 64 | stages=[3], 65 | ) 66 | assert not tampered.ok 67 | 68 | shutil.rmtree(report.workspace, ignore_errors=True) 69 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.chat_sessions.ChatSessionSummary.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Aggregated summary metadata for a chat session.", 3 | "properties": { 4 | "created_at": { 5 | "title": "Created At", 6 | "type": "string" 7 | }, 8 | "intent_counts": { 9 | "additionalProperties": { 10 | "type": "integer" 11 | }, 12 | "title": "Intent Counts", 13 | "type": "object" 14 | }, 15 | "last_answer_preview": { 16 | "anyOf": [ 17 | { 18 | "type": "string" 19 | }, 20 | { 21 | "type": "null" 22 | } 23 | ], 24 | "default": null, 25 | "title": "Last Answer Preview" 26 | }, 27 | "last_citations": { 28 | "items": { 29 | "type": "string" 30 | }, 31 | "title": "Last Citations", 32 | "type": "array" 33 | }, 34 | "last_clarifying_question": { 35 | "anyOf": [ 36 | { 37 | "type": "string" 38 | }, 39 | { 40 | "type": "null" 41 | } 42 | ], 43 | "default": null, 44 | "title": "Last Clarifying Question" 45 | }, 46 | "last_feedback": { 47 | "anyOf": [ 48 | { 49 | "type": "string" 50 | }, 51 | { 52 | "type": "null" 53 | } 54 | ], 55 | "default": null, 56 | "title": "Last Feedback" 57 | }, 58 | "last_question": { 59 | "anyOf": [ 60 | { 61 | "type": "string" 62 | }, 63 | { 64 | "type": "null" 65 | } 66 | ], 67 | "default": null, 68 | "title": "Last Question" 69 | }, 70 | "last_retrieval_ms": { 71 | "anyOf": [ 72 | { 73 | "type": "number" 74 | }, 75 | { 76 | "type": "null" 77 | } 78 | ], 79 | "default": null, 80 | "title": "Last Retrieval Ms" 81 | }, 82 | "session_id": { 83 | "title": "Session Id", 84 | "type": "string" 85 | }, 86 | "turn_count": { 87 | "default": 0, 88 | "title": "Turn Count", 89 | "type": "integer" 90 | }, 91 | "updated_at": { 92 | "title": "Updated At", 93 | "type": "string" 94 | } 95 | }, 96 | "required": [ 97 | "session_id", 98 | "created_at", 99 | "updated_at" 100 | ], 101 | "title": "ChatSessionSummary", 102 | "type": "object" 103 | } 104 | -------------------------------------------------------------------------------- /src/aijournal/api/chat.py: -------------------------------------------------------------------------------- 1 | """Strict chat API models shared by CLI and services.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING, Any, Literal 6 | 7 | from pydantic import Field 8 | 9 | from aijournal.common.base import StrictModel 10 | 11 | if TYPE_CHECKING: 12 | from aijournal.domain.index import RetrievedChunk 13 | 14 | 15 | class ChatCitation(StrictModel): 16 | """Reference to a retrieved chunk included in a chat response.""" 17 | 18 | chunk_id: str 19 | code: str 20 | normalized_id: str 21 | chunk_index: int 22 | source_path: str 23 | date: str 24 | tags: list[str] = Field(default_factory=list) 25 | score: float 26 | chunk_type: str 27 | 28 | @property 29 | def marker(self) -> str: 30 | label = self.chunk_type or "entry" 31 | return f"[{label}:{self.code}]" 32 | 33 | @classmethod 34 | def from_chunk(cls, chunk: RetrievedChunk) -> ChatCitation: 35 | code = f"{chunk.normalized_id}#p{chunk.chunk_index}" 36 | return cls( 37 | chunk_id=chunk.chunk_id, 38 | code=code, 39 | normalized_id=chunk.normalized_id, 40 | chunk_index=chunk.chunk_index, 41 | source_path=chunk.source_path, 42 | date=chunk.date, 43 | tags=list(chunk.tags), 44 | score=chunk.score, 45 | chunk_type=chunk.chunk_type or "entry", 46 | ) 47 | 48 | 49 | class ChatCitationRef(StrictModel): 50 | """Reference emitted by the LLM; resolved against retrieved chunks.""" 51 | 52 | code: str = Field(min_length=1) 53 | 54 | 55 | class ChatResponse(StrictModel): 56 | """Structured response returned by the chat LLM.""" 57 | 58 | answer: str = Field(..., max_length=4000) 59 | citations: list[ChatCitationRef] = Field(default_factory=list) 60 | clarifying_question: str | None = None 61 | telemetry: dict[str, Any] = Field(default_factory=dict) 62 | timestamp: str | None = None 63 | 64 | 65 | class ChatRequest(StrictModel): 66 | """Incoming chat payload for both CLI and FastAPI surfaces.""" 67 | 68 | question: str = Field(min_length=1) 69 | top: int | None = Field(default=None, ge=1) 70 | tags: list[str] | None = None 71 | source: list[str] | None = None 72 | date_from: str | None = None 73 | date_to: str | None = None 74 | session_id: str | None = Field(default=None, pattern=r"^[A-Za-z0-9_.\-]+$") 75 | save: bool = True 76 | feedback: Literal["up", "down"] | None = None 77 | -------------------------------------------------------------------------------- /src/aijournal/io/yaml_io.py: -------------------------------------------------------------------------------- 1 | """Typed YAML serialization helpers for Pydantic models.""" 2 | 3 | from __future__ import annotations 4 | 5 | from enum import Enum 6 | from typing import TYPE_CHECKING, Any, TypeVar 7 | 8 | import yaml 9 | from pydantic import BaseModel 10 | 11 | if TYPE_CHECKING: 12 | from pathlib import Path 13 | 14 | T = TypeVar("T", bound=BaseModel) 15 | 16 | 17 | class _EnumSafeDumper(yaml.SafeDumper): 18 | """YAML dumper that serializes enum instances as their values.""" 19 | 20 | 21 | def _enum_representer(dumper: _EnumSafeDumper, value: Enum) -> yaml.Node: 22 | payload = value.value if hasattr(value, "value") else value 23 | return dumper.represent_scalar("tag:yaml.org,2002:str", str(payload)) 24 | 25 | 26 | _EnumSafeDumper.add_multi_representer(Enum, _enum_representer) 27 | 28 | 29 | def _str_representer(dumper: _EnumSafeDumper, value: str) -> yaml.Node: 30 | """Render unicode directly and pretty-print multiline scalars.""" 31 | style = "|" if "\n" in value else None 32 | return dumper.represent_scalar("tag:yaml.org,2002:str", value, style=style) 33 | 34 | 35 | _EnumSafeDumper.add_representer(str, _str_representer) 36 | 37 | 38 | def _read_yaml(path: Path) -> Any: 39 | data = yaml.safe_load(path.read_text(encoding="utf-8")) 40 | return data if data is not None else {} 41 | 42 | 43 | def load_yaml_model(path: Path, cls: type[T], *, default: T | None = None) -> T: 44 | """Load a YAML document into the requested Pydantic model.""" 45 | if not path.exists(): 46 | if default is not None: 47 | return default 48 | raise FileNotFoundError(path) 49 | data = _read_yaml(path) 50 | return cls.model_validate(data) 51 | 52 | 53 | def dump_yaml(data: Any, *, sort_keys: bool = False) -> str: 54 | """Serialize arbitrary data to YAML using the enum-safe dumper.""" 55 | return yaml.dump( 56 | data, 57 | Dumper=_EnumSafeDumper, 58 | sort_keys=sort_keys, 59 | allow_unicode=True, 60 | ) 61 | 62 | 63 | def write_yaml_model(path: Path, instance: T) -> None: 64 | """Persist a Pydantic model instance to YAML on disk.""" 65 | payload = instance.model_dump(mode="python", exclude_none=False) 66 | path.parent.mkdir(parents=True, exist_ok=True) 67 | serialized = dump_yaml(payload, sort_keys=False) 68 | if path.exists(): 69 | existing = path.read_text(encoding="utf-8") 70 | if existing == serialized: 71 | return 72 | path.write_text(serialized, encoding="utf-8") 73 | -------------------------------------------------------------------------------- /src/aijournal/services/capture/stages/stage8_pack.py: -------------------------------------------------------------------------------- 1 | """Stage 8: assemble shareable persona/context packs.""" 2 | 3 | from __future__ import annotations 4 | 5 | from time import perf_counter 6 | from typing import TYPE_CHECKING 7 | 8 | import typer 9 | 10 | if TYPE_CHECKING: 11 | from pathlib import Path 12 | 13 | from aijournal.api.capture import CaptureInput 14 | from aijournal.services.capture import PackStage8Outputs 15 | 16 | 17 | def run_pack_stage_8( 18 | inputs: CaptureInput, 19 | root: Path, 20 | run_id: str, 21 | persona_changed: bool, 22 | ) -> PackStage8Outputs: 23 | from aijournal.commands.pack import run_pack 24 | from aijournal.services.capture import PackStage8Outputs 25 | from aijournal.services.capture.results import OperationResult 26 | from aijournal.services.capture.utils import relative_path 27 | 28 | if not inputs.pack: 29 | return PackStage8Outputs(OperationResult.noop("no pack requested"), 0.0) 30 | if not persona_changed: 31 | return PackStage8Outputs( 32 | OperationResult.noop("persona unchanged, pack not regenerated"), 33 | 0.0, 34 | ) 35 | 36 | stage_start = perf_counter() 37 | level = inputs.pack.upper() 38 | history_days = 1 if level == "L4" else 0 39 | pack_output = root / "derived" / "packs" / f"{level.lower()}_{run_id}.yaml" 40 | pack_error: str | None = None 41 | try: 42 | run_pack( 43 | level, 44 | None, 45 | output=pack_output, 46 | max_tokens=None, 47 | fmt="yaml", 48 | history_days=history_days, 49 | dry_run=False, 50 | ) 51 | except typer.Exit as exc: 52 | if exc.exit_code not in (0,): 53 | pack_error = str(exc) 54 | except Exception as exc: # pragma: no cover - defensive 55 | pack_error = str(exc) 56 | duration_ms = (perf_counter() - stage_start) * 1000.0 57 | pack_details: dict[str, object] = {"level": level, "history_days": history_days} 58 | if pack_error is not None: 59 | op_result = OperationResult.fail( 60 | f"pack generation failed: {pack_error}", 61 | details=pack_details, 62 | ) 63 | else: 64 | rel_output = relative_path(pack_output, root) 65 | op_result = OperationResult.wrote( 66 | [rel_output], 67 | message="pack generated", 68 | details=pack_details, 69 | ) 70 | return PackStage8Outputs(op_result, duration_ms) 71 | -------------------------------------------------------------------------------- /src/aijournal/services/capture/stages/stage2_summarize.py: -------------------------------------------------------------------------------- 1 | """Stage 2: generate and persist daily summaries.""" 2 | 3 | from __future__ import annotations 4 | 5 | from time import perf_counter 6 | from typing import TYPE_CHECKING 7 | 8 | if TYPE_CHECKING: 9 | from pathlib import Path 10 | 11 | from aijournal.api.capture import CaptureInput 12 | from aijournal.common.app_config import AppConfig 13 | from aijournal.services.capture import SummarizeStage2Outputs 14 | 15 | 16 | def run_summarize_stage_2( 17 | changed_dates: list[str], 18 | inputs: CaptureInput, 19 | root: Path, 20 | config: AppConfig, 21 | ) -> SummarizeStage2Outputs: 22 | from aijournal.services.capture import SummarizeStage2Outputs 23 | from aijournal.services.capture.graceful import graceful_summarize 24 | from aijournal.services.capture.results import OperationResult 25 | from aijournal.services.capture.utils import relative_path 26 | 27 | stage_start = perf_counter() 28 | summary_paths: list[str] = [] 29 | summary_errors: list[str] = [] 30 | for date in changed_dates: 31 | summary_path, error = graceful_summarize( 32 | date, 33 | progress=inputs.progress, 34 | workspace=root, 35 | config=config, 36 | ) 37 | if error: 38 | summary_errors.append(f"{date}: {error}") 39 | elif summary_path: 40 | summary_paths.append(relative_path(summary_path, root)) 41 | duration_ms = (perf_counter() - stage_start) * 1000.0 42 | summary_details: dict[str, object] = {"dates": changed_dates} 43 | if summary_errors: 44 | message = "summaries completed with errors" if summary_paths else "summaries failed" 45 | op_result = OperationResult( 46 | ok=bool(summary_paths), 47 | changed=bool(summary_paths), 48 | message=message, 49 | artifacts=summary_paths, 50 | warnings=summary_errors, 51 | details=summary_details, 52 | ) 53 | elif summary_paths: 54 | message = f"generated summaries for {len(summary_paths)} entries" 55 | op_result = OperationResult.wrote( 56 | summary_paths, 57 | message=message, 58 | details=summary_details, 59 | ) 60 | else: 61 | op_result = OperationResult.noop( 62 | "summaries already up to date", 63 | details=summary_details, 64 | ) 65 | return SummarizeStage2Outputs(op_result, duration_ms, summary_paths) 66 | -------------------------------------------------------------------------------- /schemas/core/aijournal.api.chat.ChatRequest.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Incoming chat payload for both CLI and FastAPI surfaces.", 3 | "properties": { 4 | "date_from": { 5 | "anyOf": [ 6 | { 7 | "type": "string" 8 | }, 9 | { 10 | "type": "null" 11 | } 12 | ], 13 | "default": null, 14 | "title": "Date From" 15 | }, 16 | "date_to": { 17 | "anyOf": [ 18 | { 19 | "type": "string" 20 | }, 21 | { 22 | "type": "null" 23 | } 24 | ], 25 | "default": null, 26 | "title": "Date To" 27 | }, 28 | "feedback": { 29 | "anyOf": [ 30 | { 31 | "enum": [ 32 | "up", 33 | "down" 34 | ], 35 | "type": "string" 36 | }, 37 | { 38 | "type": "null" 39 | } 40 | ], 41 | "default": null, 42 | "title": "Feedback" 43 | }, 44 | "question": { 45 | "minLength": 1, 46 | "title": "Question", 47 | "type": "string" 48 | }, 49 | "save": { 50 | "default": true, 51 | "title": "Save", 52 | "type": "boolean" 53 | }, 54 | "session_id": { 55 | "anyOf": [ 56 | { 57 | "pattern": "^[A-Za-z0-9_.\\-]+$", 58 | "type": "string" 59 | }, 60 | { 61 | "type": "null" 62 | } 63 | ], 64 | "default": null, 65 | "title": "Session Id" 66 | }, 67 | "source": { 68 | "anyOf": [ 69 | { 70 | "items": { 71 | "type": "string" 72 | }, 73 | "type": "array" 74 | }, 75 | { 76 | "type": "null" 77 | } 78 | ], 79 | "default": null, 80 | "title": "Source" 81 | }, 82 | "tags": { 83 | "anyOf": [ 84 | { 85 | "items": { 86 | "type": "string" 87 | }, 88 | "type": "array" 89 | }, 90 | { 91 | "type": "null" 92 | } 93 | ], 94 | "default": null, 95 | "title": "Tags" 96 | }, 97 | "top": { 98 | "anyOf": [ 99 | { 100 | "minimum": 1, 101 | "type": "integer" 102 | }, 103 | { 104 | "type": "null" 105 | } 106 | ], 107 | "default": null, 108 | "title": "Top" 109 | } 110 | }, 111 | "required": [ 112 | "question" 113 | ], 114 | "title": "ChatRequest", 115 | "type": "object" 116 | } 117 | -------------------------------------------------------------------------------- /tests/services/test_claim_id_generation.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from aijournal.commands import profile as profile_cmd 4 | from aijournal.domain.changes import ClaimAtomInput, ClaimProposal 5 | from aijournal.domain.claims import Scope 6 | from aijournal.domain.enums import ClaimStatus, ClaimType 7 | from aijournal.domain.evidence import SourceRef 8 | from aijournal.utils import time as time_utils 9 | 10 | 11 | def _make_proposal( 12 | statement: str, 13 | *, 14 | normalized_id: str = "entry-2006-12-01", 15 | predicate: str = "prefers", 16 | ) -> ClaimProposal: 17 | claim_input = ClaimAtomInput( 18 | type=ClaimType.PREFERENCE, 19 | subject="work", 20 | predicate=predicate, 21 | statement=statement, 22 | scope=Scope(), 23 | strength=0.6, 24 | status=ClaimStatus.ACCEPTED, 25 | review_after_days=120, 26 | ) 27 | return ClaimProposal( 28 | type=claim_input.type, 29 | subject=claim_input.subject, 30 | predicate=claim_input.predicate, 31 | statement=claim_input.statement, 32 | scope=claim_input.scope, 33 | strength=claim_input.strength, 34 | status=claim_input.status, 35 | review_after_days=claim_input.review_after_days, 36 | normalized_ids=[normalized_id], 37 | evidence=[SourceRef(entry_id=normalized_id, spans=[])], 38 | ) 39 | 40 | 41 | def test_claim_ids_include_hash_suffix_for_uniqueness() -> None: 42 | proposal_a = _make_proposal("Prefers morning planning sessions") 43 | proposal_b = _make_proposal("Prefers evening reflection rituals") 44 | 45 | id_a = profile_cmd._proposal_claim_id( 46 | proposal_a, 47 | proposal_a.statement, 48 | set(), 49 | ) 50 | id_b = profile_cmd._proposal_claim_id( 51 | proposal_b, 52 | proposal_b.statement, 53 | {id_a}, 54 | ) 55 | 56 | assert id_a != id_b 57 | assert id_a.startswith("entry-2006-12-01-") 58 | assert id_b.startswith("entry-2006-12-01-") 59 | 60 | 61 | def test_apply_claim_proposal_keeps_every_statement() -> None: 62 | timestamp = time_utils.format_timestamp(time_utils.now()) 63 | claims: list = [] 64 | proposals = [ 65 | _make_proposal("Blocks mornings for planning", predicate="plans_mornings"), 66 | _make_proposal("Schedules evening retros", predicate="retros_evenings"), 67 | ] 68 | 69 | for proposal in proposals: 70 | profile_cmd._apply_claim_proposal(claims, proposal, timestamp) 71 | 72 | assert len(claims) == len(proposals) 73 | assert len({claim.id for claim in claims}) == len(proposals) 74 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.chat_sessions.ChatTranscriptTurn.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "ChatTelemetry": { 4 | "description": "Telemetry captured during a chat turn.", 5 | "properties": { 6 | "chunk_count": { 7 | "title": "Chunk Count", 8 | "type": "integer" 9 | }, 10 | "model": { 11 | "title": "Model", 12 | "type": "string" 13 | }, 14 | "retrieval_ms": { 15 | "title": "Retrieval Ms", 16 | "type": "number" 17 | }, 18 | "retriever_source": { 19 | "title": "Retriever Source", 20 | "type": "string" 21 | } 22 | }, 23 | "required": [ 24 | "retrieval_ms", 25 | "chunk_count", 26 | "retriever_source", 27 | "model" 28 | ], 29 | "title": "ChatTelemetry", 30 | "type": "object" 31 | } 32 | }, 33 | "description": "Captured question/answer pair within a chat transcript.", 34 | "properties": { 35 | "answer": { 36 | "title": "Answer", 37 | "type": "string" 38 | }, 39 | "citations": { 40 | "items": { 41 | "type": "string" 42 | }, 43 | "title": "Citations", 44 | "type": "array" 45 | }, 46 | "clarifying_question": { 47 | "anyOf": [ 48 | { 49 | "type": "string" 50 | }, 51 | { 52 | "type": "null" 53 | } 54 | ], 55 | "default": null, 56 | "title": "Clarifying Question" 57 | }, 58 | "fake_mode": { 59 | "title": "Fake Mode", 60 | "type": "boolean" 61 | }, 62 | "feedback": { 63 | "anyOf": [ 64 | { 65 | "type": "string" 66 | }, 67 | { 68 | "type": "null" 69 | } 70 | ], 71 | "default": null, 72 | "title": "Feedback" 73 | }, 74 | "intent": { 75 | "title": "Intent", 76 | "type": "string" 77 | }, 78 | "question": { 79 | "title": "Question", 80 | "type": "string" 81 | }, 82 | "telemetry": { 83 | "$ref": "#/$defs/ChatTelemetry" 84 | }, 85 | "timestamp": { 86 | "title": "Timestamp", 87 | "type": "string" 88 | }, 89 | "turn_index": { 90 | "title": "Turn Index", 91 | "type": "integer" 92 | } 93 | }, 94 | "required": [ 95 | "turn_index", 96 | "timestamp", 97 | "question", 98 | "answer", 99 | "intent", 100 | "telemetry", 101 | "fake_mode" 102 | ], 103 | "title": "ChatTranscriptTurn", 104 | "type": "object" 105 | } 106 | -------------------------------------------------------------------------------- /src/aijournal/services/capture/stages/stage3_facts.py: -------------------------------------------------------------------------------- 1 | """Stage 3: extract micro-facts and store consolidated records.""" 2 | 3 | from __future__ import annotations 4 | 5 | from time import perf_counter 6 | from typing import TYPE_CHECKING 7 | 8 | if TYPE_CHECKING: 9 | from pathlib import Path 10 | 11 | from aijournal.api.capture import CaptureInput 12 | from aijournal.common.app_config import AppConfig 13 | from aijournal.services.capture import FactsStage3Outputs 14 | 15 | 16 | def run_facts_stage_3( 17 | changed_dates: list[str], 18 | inputs: CaptureInput, 19 | root: Path, 20 | config: AppConfig, 21 | ) -> FactsStage3Outputs: 22 | from aijournal.commands.profile import load_profile_components 23 | from aijournal.services.capture import FactsStage3Outputs 24 | from aijournal.services.capture.graceful import graceful_facts 25 | from aijournal.services.capture.results import OperationResult 26 | from aijournal.services.capture.utils import relative_path 27 | 28 | stage_start = perf_counter() 29 | facts_paths: list[str] = [] 30 | facts_errors: list[str] = [] 31 | _, claim_models = load_profile_components(root, config=config) 32 | for date in changed_dates: 33 | facts_path, error = graceful_facts( 34 | date, 35 | progress=inputs.progress, 36 | claim_models=claim_models, 37 | generate_preview=False, 38 | workspace=root, 39 | config=config, 40 | ) 41 | if error: 42 | facts_errors.append(f"{date}: {error}") 43 | elif facts_path: 44 | facts_paths.append(relative_path(facts_path, root)) 45 | duration_ms = (perf_counter() - stage_start) * 1000.0 46 | facts_details: dict[str, object] = {"dates": changed_dates} 47 | if facts_errors: 48 | message = "facts completed with errors" if facts_paths else "facts failed" 49 | op_result = OperationResult( 50 | ok=bool(facts_paths), 51 | changed=bool(facts_paths), 52 | message=message, 53 | artifacts=facts_paths, 54 | warnings=facts_errors, 55 | details=facts_details, 56 | ) 57 | elif facts_paths: 58 | message = f"extracted micro-facts for {len(facts_paths)} entries" 59 | op_result = OperationResult.wrote( 60 | facts_paths, 61 | message=message, 62 | details=facts_details, 63 | ) 64 | else: 65 | op_result = OperationResult.noop( 66 | "micro-facts already up to date", 67 | details=facts_details, 68 | ) 69 | return FactsStage3Outputs(op_result, duration_ms, facts_paths) 70 | -------------------------------------------------------------------------------- /tests/prompts/test_metadata_filtering.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import pytest 4 | from pydantic import ValidationError 5 | 6 | from aijournal.domain.prompts import ( 7 | PromptMicroFact, 8 | PromptMicroFacts, 9 | convert_prompt_microfacts, 10 | is_metadata_only_fact, 11 | ) 12 | 13 | 14 | def _fact( 15 | *, 16 | fact_id: str = "fact-1", 17 | statement: str = "Completed deep work block", 18 | evidence_entry: str | None = "entry-1", 19 | ) -> PromptMicroFact: 20 | return PromptMicroFact( 21 | id=fact_id, 22 | statement=statement, 23 | confidence=0.9, 24 | evidence_entry=evidence_entry, 25 | ) 26 | 27 | 28 | def test_is_metadata_only_fact_detects_common_patterns() -> None: 29 | assert is_metadata_only_fact(_fact(fact_id="entry-created-foo")) 30 | assert is_metadata_only_fact(_fact(statement="Entry created on 2025-11-14")) 31 | assert is_metadata_only_fact(_fact(statement="Title is Focus Sprint")) 32 | assert is_metadata_only_fact(_fact(evidence_entry=None, statement="Any content")) 33 | 34 | 35 | def test_is_metadata_only_fact_allows_grounded_content() -> None: 36 | assert not is_metadata_only_fact( 37 | _fact(statement="Completed 2h focus block on auth plan", fact_id="focus-block"), 38 | ) 39 | 40 | 41 | def test_convert_prompt_microfacts_filters_metadata_only_entries() -> None: 42 | prompt = PromptMicroFacts( 43 | facts=[ 44 | _fact(fact_id="entry-created-foo", statement="Entry created on 2025-11-14"), 45 | _fact(fact_id="focus-block", statement="Completed 2h focus block"), 46 | ], 47 | ) 48 | 49 | result = convert_prompt_microfacts(prompt, entry_dates={"entry-1": "2025-11-14"}) 50 | 51 | statements = [fact.statement for fact in result.facts] 52 | assert statements == ["Completed 2h focus block"] 53 | 54 | 55 | def test_prompt_microfacts_validator_rejects_unknown_entry_ids() -> None: 56 | payload = { 57 | "facts": [ 58 | { 59 | "id": "focus-block", 60 | "statement": "Completed 2h focus block", 61 | "confidence": 0.8, 62 | "evidence_entry": "entry-1", 63 | }, 64 | ], 65 | "claim_proposals": [], 66 | } 67 | 68 | # Valid when context lists entry-1. 69 | instance = PromptMicroFacts.model_validate(payload, context={"entry_ids": ["entry-1"]}) 70 | assert instance.facts[0].evidence_entry == "entry-1" 71 | 72 | # Invalid when entry ID missing from context. 73 | with pytest.raises(ValidationError): 74 | PromptMicroFacts.model_validate(payload, context={"entry_ids": ["other-entry"]}) 75 | -------------------------------------------------------------------------------- /src/aijournal/common/command_runner.py: -------------------------------------------------------------------------------- 1 | """Utility to run command pipelines with standardized logging.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from typing import TYPE_CHECKING, Any, TypeVar 7 | 8 | from pydantic import BaseModel 9 | from pydantic_core import PydanticSerializationError 10 | 11 | if TYPE_CHECKING: 12 | from collections.abc import Callable 13 | 14 | from aijournal.common.context import RunContext 15 | 16 | OptionsT = TypeVar("OptionsT", bound=BaseModel) 17 | PreparedT = TypeVar("PreparedT") 18 | ResultT = TypeVar("ResultT") 19 | OutputT = TypeVar("OutputT") 20 | 21 | 22 | def run_command_pipeline( 23 | ctx: RunContext, 24 | options: OptionsT, 25 | *, 26 | prepare_inputs: Callable[[RunContext, OptionsT], PreparedT], 27 | invoke_pipeline: Callable[[RunContext, PreparedT], ResultT], 28 | persist_output: Callable[[RunContext, ResultT], OutputT], 29 | ) -> OutputT: 30 | ctx.emit(event="command_start", options=_summarize(options)) 31 | with ctx.span("prepare_inputs"): 32 | prepared = prepare_inputs(ctx, options) 33 | with ctx.span("invoke_pipeline"): 34 | result = invoke_pipeline(ctx, prepared) 35 | with ctx.span("persist_output"): 36 | output = persist_output(ctx, result) 37 | ctx.emit(event="command_complete", output=_summarize(output)) 38 | return output 39 | 40 | 41 | def _summarize(value: Any) -> Any: 42 | result: Any 43 | if isinstance(value, BaseModel): 44 | try: 45 | result = value.model_dump(exclude_none=True, mode="json") 46 | except PydanticSerializationError: 47 | raw = value.model_dump(exclude_none=True, mode="python") 48 | result = _convert(raw) 49 | elif isinstance(value, (str, int, float, bool)) or value is None: 50 | result = value 51 | elif isinstance(value, Path): 52 | result = str(value) 53 | elif isinstance(value, (list, tuple, set)): 54 | result = [_summarize(item) for item in value] 55 | elif hasattr(value, "model_dump"): 56 | try: 57 | result = value.model_dump() 58 | except Exception: # pragma: no cover - defensive 59 | result = str(value) 60 | else: 61 | result = str(value) 62 | return result 63 | 64 | 65 | def _convert(obj: Any) -> Any: 66 | if isinstance(obj, dict): 67 | return {key: _convert(val) for key, val in obj.items()} 68 | if isinstance(obj, (list, tuple, set)): 69 | return [_convert(item) for item in obj] 70 | if callable(obj): 71 | return getattr(obj, "__name__", "callable") 72 | if isinstance(obj, Path): 73 | return str(obj) 74 | return obj 75 | -------------------------------------------------------------------------------- /schemas/core/aijournal.services.consolidator.ClaimConflict.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "ClaimSignature": { 4 | "properties": { 5 | "claim_type": { 6 | "title": "Claim Type", 7 | "type": "string" 8 | }, 9 | "predicate": { 10 | "title": "Predicate", 11 | "type": "string" 12 | }, 13 | "scope": { 14 | "maxItems": 2, 15 | "minItems": 2, 16 | "prefixItems": [ 17 | { 18 | "anyOf": [ 19 | { 20 | "type": "string" 21 | }, 22 | { 23 | "type": "null" 24 | } 25 | ] 26 | }, 27 | { 28 | "items": { 29 | "type": "string" 30 | }, 31 | "type": "array" 32 | } 33 | ], 34 | "title": "Scope", 35 | "type": "array" 36 | }, 37 | "subject": { 38 | "title": "Subject", 39 | "type": "string" 40 | } 41 | }, 42 | "required": [ 43 | "claim_type", 44 | "subject", 45 | "predicate", 46 | "scope" 47 | ], 48 | "title": "ClaimSignature", 49 | "type": "object" 50 | }, 51 | "SourceRef": { 52 | "description": "Reference to a normalized entry that supports a claim or fact.", 53 | "properties": { 54 | "entry_id": { 55 | "title": "Entry Id", 56 | "type": "string" 57 | } 58 | }, 59 | "required": [ 60 | "entry_id" 61 | ], 62 | "title": "SourceRef", 63 | "type": "object" 64 | } 65 | }, 66 | "properties": { 67 | "claim_id": { 68 | "title": "Claim Id", 69 | "type": "string" 70 | }, 71 | "existing_statement": { 72 | "title": "Existing Statement", 73 | "type": "string" 74 | }, 75 | "incoming_sources": { 76 | "items": { 77 | "$ref": "#/$defs/SourceRef" 78 | }, 79 | "title": "Incoming Sources", 80 | "type": "array" 81 | }, 82 | "incoming_statement": { 83 | "title": "Incoming Statement", 84 | "type": "string" 85 | }, 86 | "signature": { 87 | "$ref": "#/$defs/ClaimSignature" 88 | }, 89 | "statement": { 90 | "title": "Statement", 91 | "type": "string" 92 | } 93 | }, 94 | "required": [ 95 | "claim_id", 96 | "signature", 97 | "statement", 98 | "existing_statement", 99 | "incoming_statement" 100 | ], 101 | "title": "ClaimConflict", 102 | "type": "object" 103 | } 104 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.events.ClaimConflictPayload.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "ClaimSignaturePayload": { 4 | "description": "Serialized signature describing the target slot for a claim.", 5 | "properties": { 6 | "claim_type": { 7 | "title": "Claim Type", 8 | "type": "string" 9 | }, 10 | "context": { 11 | "items": { 12 | "type": "string" 13 | }, 14 | "title": "Context", 15 | "type": "array" 16 | }, 17 | "domain": { 18 | "anyOf": [ 19 | { 20 | "type": "string" 21 | }, 22 | { 23 | "type": "null" 24 | } 25 | ], 26 | "default": null, 27 | "title": "Domain" 28 | }, 29 | "predicate": { 30 | "title": "Predicate", 31 | "type": "string" 32 | }, 33 | "subject": { 34 | "title": "Subject", 35 | "type": "string" 36 | } 37 | }, 38 | "required": [ 39 | "claim_type", 40 | "subject", 41 | "predicate" 42 | ], 43 | "title": "ClaimSignaturePayload", 44 | "type": "object" 45 | }, 46 | "SourceRef": { 47 | "description": "Reference to a normalized entry that supports a claim or fact.", 48 | "properties": { 49 | "entry_id": { 50 | "title": "Entry Id", 51 | "type": "string" 52 | } 53 | }, 54 | "required": [ 55 | "entry_id" 56 | ], 57 | "title": "SourceRef", 58 | "type": "object" 59 | } 60 | }, 61 | "description": "Structured conflict emitted during consolidation previews.", 62 | "properties": { 63 | "claim_id": { 64 | "title": "Claim Id", 65 | "type": "string" 66 | }, 67 | "existing_statement": { 68 | "title": "Existing Statement", 69 | "type": "string" 70 | }, 71 | "incoming_sources": { 72 | "items": { 73 | "$ref": "#/$defs/SourceRef" 74 | }, 75 | "title": "Incoming Sources", 76 | "type": "array" 77 | }, 78 | "incoming_statement": { 79 | "title": "Incoming Statement", 80 | "type": "string" 81 | }, 82 | "signature": { 83 | "$ref": "#/$defs/ClaimSignaturePayload" 84 | }, 85 | "statement": { 86 | "title": "Statement", 87 | "type": "string" 88 | } 89 | }, 90 | "required": [ 91 | "claim_id", 92 | "signature", 93 | "statement", 94 | "existing_statement", 95 | "incoming_statement" 96 | ], 97 | "title": "ClaimConflictPayload", 98 | "type": "object" 99 | } 100 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.changes.FacetChange.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "FacetOperation": { 4 | "enum": [ 5 | "set", 6 | "remove", 7 | "merge" 8 | ], 9 | "title": "FacetOperation", 10 | "type": "string" 11 | }, 12 | "SourceRef": { 13 | "description": "Reference to a normalized entry that supports a claim or fact.", 14 | "properties": { 15 | "entry_id": { 16 | "title": "Entry Id", 17 | "type": "string" 18 | } 19 | }, 20 | "required": [ 21 | "entry_id" 22 | ], 23 | "title": "SourceRef", 24 | "type": "object" 25 | } 26 | }, 27 | "description": "Facet modification proposed by characterization pipelines.", 28 | "properties": { 29 | "confidence": { 30 | "anyOf": [ 31 | { 32 | "type": "number" 33 | }, 34 | { 35 | "type": "null" 36 | } 37 | ], 38 | "default": null, 39 | "title": "Confidence" 40 | }, 41 | "evidence": { 42 | "items": { 43 | "$ref": "#/$defs/SourceRef" 44 | }, 45 | "title": "Evidence", 46 | "type": "array" 47 | }, 48 | "method": { 49 | "anyOf": [ 50 | { 51 | "type": "string" 52 | }, 53 | { 54 | "type": "null" 55 | } 56 | ], 57 | "default": null, 58 | "title": "Method" 59 | }, 60 | "operation": { 61 | "$ref": "#/$defs/FacetOperation" 62 | }, 63 | "path": { 64 | "title": "Path", 65 | "type": "string" 66 | }, 67 | "rationale": { 68 | "anyOf": [ 69 | { 70 | "type": "string" 71 | }, 72 | { 73 | "type": "null" 74 | } 75 | ], 76 | "default": null, 77 | "title": "Rationale" 78 | }, 79 | "review_after_days": { 80 | "anyOf": [ 81 | { 82 | "type": "integer" 83 | }, 84 | { 85 | "type": "null" 86 | } 87 | ], 88 | "default": null, 89 | "title": "Review After Days" 90 | }, 91 | "user_verified": { 92 | "anyOf": [ 93 | { 94 | "type": "boolean" 95 | }, 96 | { 97 | "type": "null" 98 | } 99 | ], 100 | "default": null, 101 | "title": "User Verified" 102 | }, 103 | "value": { 104 | "anyOf": [ 105 | {}, 106 | { 107 | "type": "null" 108 | } 109 | ], 110 | "default": null, 111 | "title": "Value" 112 | } 113 | }, 114 | "required": [ 115 | "path", 116 | "operation" 117 | ], 118 | "title": "FacetChange", 119 | "type": "object" 120 | } 121 | -------------------------------------------------------------------------------- /tests/services/capture/test_stage_persona.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | from aijournal.common.app_config import AppConfig 6 | from aijournal.services.capture import CaptureInput 7 | from aijournal.services.capture.stages import stage7_persona 8 | 9 | if TYPE_CHECKING: 10 | from pathlib import Path 11 | 12 | 13 | def _make_inputs() -> CaptureInput: 14 | return CaptureInput(source="stdin", text="Persona test") 15 | 16 | 17 | def test_stage7_persona_triggers_build(tmp_path: Path, monkeypatch) -> None: 18 | persona_dir = tmp_path / "derived" / "persona" 19 | persona_dir.mkdir(parents=True, exist_ok=True) 20 | 21 | states = [("stale", []), ("fresh", [])] 22 | 23 | monkeypatch.setattr( 24 | "aijournal.commands.persona.persona_state", 25 | lambda root, workspace, config: states.pop(0), 26 | ) 27 | 28 | monkeypatch.setattr( 29 | "aijournal.commands.profile.load_profile_components", 30 | lambda *_, **__: (object(), []), 31 | ) 32 | 33 | monkeypatch.setattr( 34 | "aijournal.commands.profile.profile_to_dict", 35 | lambda profile: {"name": "profile"}, 36 | ) 37 | 38 | monkeypatch.setattr( 39 | "aijournal.common.config_loader.load_config", 40 | lambda root: {}, 41 | ) 42 | 43 | persona_path = persona_dir / "persona_core.yaml" 44 | 45 | def fake_build(profile, claim_models, *, config, root): 46 | persona_path.write_text("persona", encoding="utf-8") 47 | return persona_path, True 48 | 49 | monkeypatch.setattr("aijournal.commands.persona.run_persona_build", fake_build) 50 | 51 | config = AppConfig() 52 | outputs = stage7_persona.run_persona_stage_7( 53 | _make_inputs(), 54 | tmp_path, 55 | config, 56 | {"profile": 1}, 57 | ) 58 | 59 | assert outputs.result.changed is True 60 | assert outputs.persona_changed is True 61 | assert outputs.persona_stale_before is True 62 | assert outputs.persona_stale_after is False 63 | 64 | 65 | def test_stage7_persona_noop_when_fresh(tmp_path: Path, monkeypatch) -> None: 66 | monkeypatch.setattr( 67 | "aijournal.commands.persona.persona_state", 68 | lambda root, workspace, config: ("fresh", []), 69 | ) 70 | monkeypatch.setattr( 71 | "aijournal.commands.profile.load_profile_components", 72 | lambda *_, **__: (None, []), 73 | ) 74 | 75 | config = AppConfig() 76 | outputs = stage7_persona.run_persona_stage_7( 77 | _make_inputs(), 78 | tmp_path, 79 | config, 80 | {}, 81 | ) 82 | 83 | assert outputs.result.changed is False 84 | assert outputs.result.ok is True 85 | assert outputs.persona_changed is False 86 | assert outputs.persona_stale_before is False 87 | -------------------------------------------------------------------------------- /tests/commands/test_microfact_prompts.py: -------------------------------------------------------------------------------- 1 | """Tests ensuring consolidated microfacts flow into profile update prompts.""" 2 | 3 | from __future__ import annotations 4 | 5 | import json 6 | from typing import TYPE_CHECKING 7 | 8 | from aijournal.commands import profile_update as profile_update_module 9 | from aijournal.common.app_config import AppConfig 10 | from aijournal.domain.facts import ConsolidatedMicroFact, ConsolidatedMicrofactsFile 11 | 12 | if TYPE_CHECKING: 13 | from pathlib import Path 14 | 15 | 16 | def _sample_consolidated() -> ConsolidatedMicrofactsFile: 17 | return ConsolidatedMicrofactsFile( 18 | generated_at="2025-01-05T00:00:00Z", 19 | embedding_model="fake-model", 20 | facts=[ 21 | ConsolidatedMicroFact( 22 | id="recurring.focus", 23 | statement="Blocks 8-10am for deep work", 24 | canonical_statement="blocks 8-10am for deep work", 25 | confidence=0.82, 26 | first_seen="2025-01-01", 27 | last_seen="2025-01-05", 28 | observation_count=3, 29 | domain="journal", 30 | contexts=["focus"], 31 | evidence_entries=["entry-1", "entry-2"], 32 | source_fact_ids=["2025-01-01:focus"], 33 | ), 34 | ], 35 | ) 36 | 37 | 38 | def test_profile_update_consolidated_payload(monkeypatch, tmp_path: Path) -> None: 39 | monkeypatch.setattr( 40 | profile_update_module, 41 | "load_consolidated_microfacts", 42 | lambda workspace, config: _sample_consolidated(), 43 | ) 44 | monkeypatch.setattr( 45 | profile_update_module, 46 | "select_recurring_facts", 47 | lambda snapshot, **_: [ 48 | { 49 | "statement": fact.statement, 50 | "observation_count": fact.observation_count, 51 | "first_seen": fact.first_seen, 52 | "last_seen": fact.last_seen, 53 | "contexts": fact.contexts, 54 | "evidence_entries": fact.evidence_entries, 55 | } 56 | for fact in snapshot.facts 57 | ], 58 | ) 59 | 60 | payload = profile_update_module._load_consolidated_facts_json(tmp_path, AppConfig()) 61 | consolidated_payload = json.loads(payload) 62 | assert consolidated_payload["facts"][0]["observation_count"] == 3 63 | 64 | 65 | def test_profile_update_consolidated_payload_missing_snapshot( 66 | monkeypatch, 67 | tmp_path: Path, 68 | ) -> None: 69 | monkeypatch.setattr( 70 | profile_update_module, 71 | "load_consolidated_microfacts", 72 | lambda workspace, config: None, 73 | ) 74 | 75 | payload = profile_update_module._load_consolidated_facts_json(tmp_path, AppConfig()) 76 | assert payload == "{}" 77 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.packs.PackBundle.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "PackEntry": { 4 | "description": "Single file included in an export pack.", 5 | "properties": { 6 | "content": { 7 | "title": "Content", 8 | "type": "string" 9 | }, 10 | "path": { 11 | "title": "Path", 12 | "type": "string" 13 | }, 14 | "role": { 15 | "title": "Role", 16 | "type": "string" 17 | }, 18 | "tokens": { 19 | "title": "Tokens", 20 | "type": "integer" 21 | } 22 | }, 23 | "required": [ 24 | "role", 25 | "path", 26 | "tokens", 27 | "content" 28 | ], 29 | "title": "PackEntry", 30 | "type": "object" 31 | }, 32 | "PackMeta": { 33 | "description": "Metadata describing the assembled pack.", 34 | "properties": { 35 | "generated_at": { 36 | "title": "Generated At", 37 | "type": "string" 38 | }, 39 | "max_tokens": { 40 | "title": "Max Tokens", 41 | "type": "integer" 42 | }, 43 | "total_tokens": { 44 | "title": "Total Tokens", 45 | "type": "integer" 46 | }, 47 | "trimmed": { 48 | "items": { 49 | "$ref": "#/$defs/TrimmedFile" 50 | }, 51 | "title": "Trimmed", 52 | "type": "array" 53 | } 54 | }, 55 | "required": [ 56 | "total_tokens", 57 | "max_tokens", 58 | "generated_at" 59 | ], 60 | "title": "PackMeta", 61 | "type": "object" 62 | }, 63 | "TrimmedFile": { 64 | "description": "Record of a file trimmed due to token budget limits.", 65 | "properties": { 66 | "path": { 67 | "title": "Path", 68 | "type": "string" 69 | }, 70 | "role": { 71 | "title": "Role", 72 | "type": "string" 73 | } 74 | }, 75 | "required": [ 76 | "role", 77 | "path" 78 | ], 79 | "title": "TrimmedFile", 80 | "type": "object" 81 | } 82 | }, 83 | "description": "Structured representation of a pack export.", 84 | "properties": { 85 | "date": { 86 | "title": "Date", 87 | "type": "string" 88 | }, 89 | "files": { 90 | "items": { 91 | "$ref": "#/$defs/PackEntry" 92 | }, 93 | "title": "Files", 94 | "type": "array" 95 | }, 96 | "level": { 97 | "title": "Level", 98 | "type": "string" 99 | }, 100 | "meta": { 101 | "$ref": "#/$defs/PackMeta" 102 | } 103 | }, 104 | "required": [ 105 | "level", 106 | "date", 107 | "meta" 108 | ], 109 | "title": "PackBundle", 110 | "type": "object" 111 | } 112 | -------------------------------------------------------------------------------- /tests/services/capture/test_stage_facts.py: -------------------------------------------------------------------------------- 1 | """Tests for stage3_facts graceful error handling.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import TYPE_CHECKING, Never 6 | 7 | import typer 8 | 9 | from aijournal.common.app_config import AppConfig 10 | from aijournal.services.capture import CaptureInput 11 | from aijournal.services.capture.stages import stage3_facts 12 | 13 | if TYPE_CHECKING: 14 | from pathlib import Path 15 | 16 | 17 | def _make_inputs() -> CaptureInput: 18 | return CaptureInput(source="stdin", text="Sample entry") 19 | 20 | 21 | def _make_config() -> AppConfig: 22 | return AppConfig( 23 | paths={ 24 | "data": "data", 25 | "derived": "derived", 26 | "profile": "profile", 27 | "prompts": "prompts", 28 | }, 29 | ) 30 | 31 | 32 | def test_stage3_facts_success(tmp_path: Path, monkeypatch) -> None: 33 | facts_path = tmp_path / "derived" / "microfacts" / "2025-10-27.yaml" 34 | facts_path.parent.mkdir(parents=True, exist_ok=True) 35 | 36 | called: list[str] = [] 37 | 38 | def fake_run( 39 | date: str, 40 | *, 41 | progress: bool, 42 | claim_models, 43 | generate_preview: bool, 44 | workspace: Path | None = None, 45 | config: AppConfig | None = None, 46 | ) -> tuple[None, Path]: 47 | del generate_preview 48 | called.append(date) 49 | facts_path.write_text("facts", encoding="utf-8") 50 | return None, facts_path 51 | 52 | def fake_load_profile(*args, **kwargs): 53 | return None, [] # profile, claims 54 | 55 | monkeypatch.setattr("aijournal.commands.facts.run_facts", fake_run) 56 | monkeypatch.setattr("aijournal.commands.profile.load_profile_components", fake_load_profile) 57 | 58 | outputs = stage3_facts.run_facts_stage_3( 59 | ["2025-10-27"], 60 | _make_inputs(), 61 | tmp_path, 62 | _make_config(), 63 | ) 64 | 65 | assert called == ["2025-10-27"] 66 | assert outputs.result.ok is True 67 | assert outputs.result.changed is True 68 | assert outputs.paths == ["derived/microfacts/2025-10-27.yaml"] 69 | 70 | 71 | def test_stage3_facts_handles_failure(tmp_path: Path, monkeypatch) -> None: 72 | def failing_run(*args, **kwargs) -> Never: 73 | raise typer.Exit(1) 74 | 75 | def fake_load_profile(*args, **kwargs): 76 | return None, [] 77 | 78 | monkeypatch.setattr("aijournal.commands.facts.run_facts", failing_run) 79 | monkeypatch.setattr("aijournal.commands.profile.load_profile_components", fake_load_profile) 80 | 81 | outputs = stage3_facts.run_facts_stage_3( 82 | ["2025-10-27"], 83 | _make_inputs(), 84 | tmp_path, 85 | _make_config(), 86 | ) 87 | 88 | assert outputs.result.ok is False 89 | assert outputs.result.changed is False 90 | assert outputs.result.warnings 91 | assert outputs.paths == [] 92 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.index.ChunkBatch.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "Chunk": { 4 | "description": "Normalized chunk persisted in the retrieval index.", 5 | "properties": { 6 | "chunk_id": { 7 | "title": "Chunk Id", 8 | "type": "string" 9 | }, 10 | "chunk_index": { 11 | "title": "Chunk Index", 12 | "type": "integer" 13 | }, 14 | "chunk_type": { 15 | "default": "entry", 16 | "title": "Chunk Type", 17 | "type": "string" 18 | }, 19 | "date": { 20 | "title": "Date", 21 | "type": "string" 22 | }, 23 | "manifest_hash": { 24 | "anyOf": [ 25 | { 26 | "type": "string" 27 | }, 28 | { 29 | "type": "null" 30 | } 31 | ], 32 | "default": null, 33 | "title": "Manifest Hash" 34 | }, 35 | "normalized_id": { 36 | "title": "Normalized Id", 37 | "type": "string" 38 | }, 39 | "source_hash": { 40 | "anyOf": [ 41 | { 42 | "type": "string" 43 | }, 44 | { 45 | "type": "null" 46 | } 47 | ], 48 | "default": null, 49 | "title": "Source Hash" 50 | }, 51 | "source_path": { 52 | "title": "Source Path", 53 | "type": "string" 54 | }, 55 | "source_type": { 56 | "anyOf": [ 57 | { 58 | "type": "string" 59 | }, 60 | { 61 | "type": "null" 62 | } 63 | ], 64 | "default": null, 65 | "title": "Source Type" 66 | }, 67 | "tags": { 68 | "items": { 69 | "type": "string" 70 | }, 71 | "title": "Tags", 72 | "type": "array" 73 | }, 74 | "text": { 75 | "title": "Text", 76 | "type": "string" 77 | }, 78 | "tokens": { 79 | "title": "Tokens", 80 | "type": "integer" 81 | } 82 | }, 83 | "required": [ 84 | "chunk_id", 85 | "normalized_id", 86 | "chunk_index", 87 | "text", 88 | "date", 89 | "source_path", 90 | "tokens" 91 | ], 92 | "title": "Chunk", 93 | "type": "object" 94 | } 95 | }, 96 | "description": "Exported chunk set for a given journal day.", 97 | "properties": { 98 | "chunks": { 99 | "items": { 100 | "$ref": "#/$defs/Chunk" 101 | }, 102 | "title": "Chunks", 103 | "type": "array" 104 | }, 105 | "day": { 106 | "title": "Day", 107 | "type": "string" 108 | } 109 | }, 110 | "required": [ 111 | "day" 112 | ], 113 | "title": "ChunkBatch", 114 | "type": "object" 115 | } 116 | -------------------------------------------------------------------------------- /src/aijournal/services/embedding.py: -------------------------------------------------------------------------------- 1 | """Embedding helpers shared across indexing and retrieval.""" 2 | 3 | from __future__ import annotations 4 | 5 | import random 6 | from dataclasses import dataclass, field 7 | from hashlib import sha256 8 | from typing import TYPE_CHECKING 9 | 10 | import httpx 11 | 12 | from aijournal.common.constants import DEFAULT_EMBED_DIM, EMBED_TIMEOUT 13 | from aijournal.services.ollama import resolve_ollama_host 14 | 15 | if TYPE_CHECKING: 16 | from collections.abc import Sequence 17 | 18 | 19 | @dataclass 20 | class EmbeddingBackend: 21 | """Thin wrapper that returns deterministic vectors in fake mode.""" 22 | 23 | model: str 24 | host: str | None = None 25 | fake_mode: bool = False 26 | dimension: int | None = None 27 | _base_host: str = field(init=False) 28 | 29 | def __post_init__(self) -> None: 30 | """Normalize and cache the base Ollama host for repeated calls.""" 31 | self._base_host = resolve_ollama_host(self.host) 32 | 33 | def embed(self, texts: Sequence[str]) -> list[list[float]]: 34 | vectors: list[list[float]] = [] 35 | if not texts: 36 | return vectors 37 | if self.fake_mode: 38 | return [self._fake_embed(text) for text in texts] 39 | 40 | endpoint = f"{self._base_host}/api/embeddings" 41 | try: 42 | with httpx.Client(timeout=EMBED_TIMEOUT) as session: 43 | for text in texts: 44 | response = session.post( 45 | endpoint, 46 | json={ 47 | "model": self.model, 48 | "prompt": text, 49 | }, 50 | ) 51 | response.raise_for_status() 52 | payload = response.json() 53 | vector = payload.get("embedding") 54 | if not isinstance(vector, list): 55 | msg = "Ollama embedding response missing vector payload" 56 | raise TypeError(msg) 57 | if self.dimension is None: 58 | self.dimension = len(vector) 59 | vectors.append([float(value) for value in vector]) 60 | except httpx.HTTPError as exc: 61 | msg = f"Ollama embedding request failed: {exc}" 62 | raise RuntimeError(msg) from exc 63 | return vectors 64 | 65 | def embed_one(self, text: str) -> list[float]: 66 | return self.embed([text])[0] if text else [0.0] * self.dim 67 | 68 | @property 69 | def dim(self) -> int: 70 | return self.dimension or DEFAULT_EMBED_DIM 71 | 72 | def _fake_embed(self, text: str) -> list[float]: 73 | seed = int.from_bytes(sha256(text.encode("utf-8")).digest()[:8], "big") 74 | rng = random.Random(seed) # noqa: S311 - deterministic fake embeddings for tests 75 | dim = self.dimension or DEFAULT_EMBED_DIM 76 | self.dimension = dim 77 | return [rng.uniform(-1.0, 1.0) for _ in range(dim)] 78 | -------------------------------------------------------------------------------- /docs/design/profile_update_inventory.md: -------------------------------------------------------------------------------- 1 | # Profile Update Surface Inventory 2 | 3 | _Updated: 2025-11-14_ 4 | 5 | > Status: Prompt3 cleanup is complete. The list below is preserved for historical/audit context so future agents know which legacy surfaces were removed or renamed when `profile_update` replaced the old flows. 6 | 7 | This note originally captured every code surface that referenced the legacy 8 | `profile_suggest` / `characterize` flows. Capture now routes exclusively 9 | through the unified `profile_update` stage. 10 | 11 | ## Command / CLI entry points 12 | 13 | - `src/aijournal/cli.py` 14 | - `profile.suggest` command → `run_profile_suggest` 15 | - `profile.apply` consumers expect artifacts from stage 4 16 | - `characterize` command and `ops pipeline characterize` wrapper 17 | - `src/aijournal/commands/profile.py` 18 | - Houses `run_profile_suggest`, apply/status plumbing, summarizer helpers, 19 | and derived artifact writers. 20 | - `src/aijournal/commands/characterize.py` 21 | - Orchestrates LLM calls for stage 5, produces 22 | `derived/pending/profile_updates/*.yaml` batches. 23 | 24 | ## Capture pipeline integration 25 | 26 | - ✅ `src/aijournal/services/capture/__init__.py` now invokes the unified 27 | `derive.profile_update` stage (wrapping `stage4_profile_update.py`). 28 | - ✅ `src/aijournal/services/capture/stages/stage4_profile_update.py` replaces 29 | the old stage4/stage5 pair. 30 | - `src/aijournal/services/capture/graceful.py` still exposes the legacy 31 | wrappers, though capture no longer consumes them. 32 | 33 | ## Prompt + DTO definitions 34 | 35 | - `prompts/profile_suggest.md` and `prompts/characterize.md` define the old 36 | LLM contracts. 37 | - `prompts/examples/profile_suggest.json` and `prompts/examples/characterize.json` 38 | back the example validation tests. 39 | - `src/aijournal/domain/prompts.py` 40 | - DTO containers (`PromptProfileUpdates`, `PromptClaimItem`, etc.) used by 41 | both prompts, plus converters into `ProfileUpdateProposals`. 42 | 43 | ## Pipeline helpers 44 | 45 | - `src/aijournal/pipelines/characterize.py` 46 | - Shared normalization and fake-mode logic for stage 5. 47 | - `src/aijournal/pipelines/facts.py` 48 | - Provides normalization helpers that stage 4 and 5 import when converting 49 | LLM-emitted claim DTOs. 50 | 51 | ## Tests and fixtures 52 | 53 | - `tests/prompts/test_prompt_examples.py` exercises 54 | `profile_suggest.json` + `characterize.json` payloads. 55 | - `tests/cli/test_cli_profile_suggest.py`, `tests/test_cli_characterize.py`, 56 | and capture-service tests (`tests/services/test_capture.py`, 57 | `tests/services/capture/test_stage_profile.py`, etc.) assert both stages run. 58 | - Simulator validators (`tests/simulator/validators.py`) expect stage 4 and 5 59 | artifacts when replaying capture runs. 60 | 61 | This inventory will guide the remaining Prompt 3 workstreams: 62 | 63 | 1. Introduce the new unified prompt (`prompts/profile_update.md`) and pipeline. 64 | 2. Wire a single `profile_update` stage into capture + CLI. 65 | 3. Delete/retire every surface listed above once parity tests pass. 66 | -------------------------------------------------------------------------------- /schemas/core/aijournal.domain.facts.ConsolidatedMicrofactsFile.json: -------------------------------------------------------------------------------- 1 | { 2 | "$defs": { 3 | "ConsolidatedMicroFact": { 4 | "description": "Global microfact entry that survives consolidation runs.", 5 | "properties": { 6 | "canonical_statement": { 7 | "title": "Canonical Statement", 8 | "type": "string" 9 | }, 10 | "confidence": { 11 | "title": "Confidence", 12 | "type": "number" 13 | }, 14 | "contexts": { 15 | "items": { 16 | "type": "string" 17 | }, 18 | "title": "Contexts", 19 | "type": "array" 20 | }, 21 | "domain": { 22 | "anyOf": [ 23 | { 24 | "type": "string" 25 | }, 26 | { 27 | "type": "null" 28 | } 29 | ], 30 | "default": null, 31 | "title": "Domain" 32 | }, 33 | "evidence_entries": { 34 | "items": { 35 | "type": "string" 36 | }, 37 | "title": "Evidence Entries", 38 | "type": "array" 39 | }, 40 | "first_seen": { 41 | "title": "First Seen", 42 | "type": "string" 43 | }, 44 | "id": { 45 | "title": "Id", 46 | "type": "string" 47 | }, 48 | "last_seen": { 49 | "title": "Last Seen", 50 | "type": "string" 51 | }, 52 | "observation_count": { 53 | "title": "Observation Count", 54 | "type": "integer" 55 | }, 56 | "source_fact_ids": { 57 | "items": { 58 | "type": "string" 59 | }, 60 | "title": "Source Fact Ids", 61 | "type": "array" 62 | }, 63 | "statement": { 64 | "title": "Statement", 65 | "type": "string" 66 | } 67 | }, 68 | "required": [ 69 | "id", 70 | "statement", 71 | "canonical_statement", 72 | "confidence", 73 | "first_seen", 74 | "last_seen", 75 | "observation_count" 76 | ], 77 | "title": "ConsolidatedMicroFact", 78 | "type": "object" 79 | } 80 | }, 81 | "description": "Artifact capturing the global consolidated microfact snapshot.", 82 | "properties": { 83 | "embedding_model": { 84 | "anyOf": [ 85 | { 86 | "type": "string" 87 | }, 88 | { 89 | "type": "null" 90 | } 91 | ], 92 | "default": null, 93 | "title": "Embedding Model" 94 | }, 95 | "facts": { 96 | "items": { 97 | "$ref": "#/$defs/ConsolidatedMicroFact" 98 | }, 99 | "title": "Facts", 100 | "type": "array" 101 | }, 102 | "generated_at": { 103 | "title": "Generated At", 104 | "type": "string" 105 | } 106 | }, 107 | "required": [ 108 | "generated_at" 109 | ], 110 | "title": "ConsolidatedMicrofactsFile", 111 | "type": "object" 112 | } 113 | -------------------------------------------------------------------------------- /src/aijournal/services/capture/stages/stage6_index.py: -------------------------------------------------------------------------------- 1 | """Stage 6: refresh the retrieval index artifacts.""" 2 | 3 | from __future__ import annotations 4 | 5 | from time import perf_counter 6 | from typing import TYPE_CHECKING, Literal 7 | 8 | import typer 9 | 10 | if TYPE_CHECKING: 11 | from collections.abc import Sequence 12 | from pathlib import Path 13 | 14 | from aijournal.services.capture import IndexStage6Outputs 15 | 16 | 17 | def run_index_stage_6( 18 | changed_dates: Sequence[str], 19 | root: Path, 20 | rebuild_mode: Literal["auto", "always", "skip"] = "auto", 21 | ) -> IndexStage6Outputs: 22 | from aijournal.commands.index import run_index_rebuild, run_index_tail 23 | from aijournal.services.capture import IndexStage6Outputs 24 | from aijournal.services.capture.results import OperationResult 25 | from aijournal.services.capture.utils import relative_path 26 | 27 | stage_start = perf_counter() 28 | index_message = "" 29 | index_error: str | None = None 30 | index_updated = False 31 | rebuilt = False 32 | force_rebuild = rebuild_mode == "always" 33 | changed_dates_list = list(changed_dates) 34 | try: 35 | chroma_dir = root / "derived" / "index" / "chroma" 36 | if force_rebuild or not chroma_dir.exists(): 37 | index_message = run_index_rebuild(since=None, limit=None) 38 | rebuilt = True 39 | index_updated = True 40 | elif changed_dates_list: 41 | since = min(changed_dates_list) 42 | index_message = run_index_tail(since=since, days=7, limit=None) 43 | if not index_message or "already up to date" not in index_message.lower(): 44 | index_updated = True 45 | else: 46 | index_message = "no capture changes detected" 47 | except typer.Exit as exc: 48 | if exc.exit_code not in (0,): 49 | index_error = str(exc) 50 | except Exception as exc: # pragma: no cover - defensive 51 | index_error = str(exc) 52 | duration_ms = (perf_counter() - stage_start) * 1000.0 53 | index_details: dict[str, object] = { 54 | "message": index_message, 55 | "rebuild": rebuilt, 56 | "mode": rebuild_mode, 57 | } 58 | if index_error is not None: 59 | op_result = OperationResult.fail( 60 | f"index update failed: {index_error}", 61 | details=index_details, 62 | ) 63 | elif index_updated: 64 | index_artifacts = [ 65 | relative_path(root / "derived" / "index" / "chroma", root), 66 | relative_path(root / "derived" / "index" / "meta.json", root), 67 | ] 68 | op_result = OperationResult.wrote( 69 | index_artifacts, 70 | message=index_message or "index refreshed", 71 | details=index_details, 72 | ) 73 | else: 74 | op_result = OperationResult.noop( 75 | index_message or "index already up to date", 76 | details=index_details, 77 | ) 78 | return IndexStage6Outputs(op_result, duration_ms, index_updated, rebuilt) 79 | --------------------------------------------------------------------------------