├── CLAUDE.md
├── src
    └── aijournal
    │   ├── py.typed
    │   ├── __init__.py
    │   ├── services
    │       ├── __init__.py
    │       ├── capture
    │       │   ├── stages
    │       │   │   ├── __init__.py
    │       │   │   ├── stage1_normalize.py
    │       │   │   ├── stage8_pack.py
    │       │   │   ├── stage2_summarize.py
    │       │   │   ├── stage3_facts.py
    │       │   │   └── stage6_index.py
    │       │   └── results.py
    │       ├── microfacts
    │       │   ├── __init__.py
    │       │   └── snapshot.py
    │       └── embedding.py
    │   ├── api
    │       ├── __init__.py
    │       ├── capture.py
    │       └── chat.py
    │   ├── common
    │       ├── __init__.py
    │       ├── types.py
    │       ├── base.py
    │       ├── constants.py
    │       ├── meta.py
    │       └── command_runner.py
    │   ├── io
    │       ├── __init__.py
    │       └── yaml_io.py
    │   ├── utils
    │       ├── __init__.py
    │       ├── coercion.py
    │       ├── time.py
    │       └── text.py
    │   ├── commands
    │       └── __init__.py
    │   ├── pipelines
    │       ├── __init__.py
    │       ├── advise.py
    │       └── summarize.py
    │   ├── models
    │       ├── __init__.py
    │       ├── base.py
    │       ├── claim_atoms.py
    │       ├── derived.py
    │       └── authoritative.py
    │   ├── domain
    │       ├── __init__.py
    │       ├── evidence.py
    │       ├── enums.py
    │       ├── persona.py
    │       ├── packs.py
    │       ├── chat.py
    │       ├── journal.py
    │       ├── advice.py
    │       ├── claims.py
    │       ├── index.py
    │       ├── chat_sessions.py
    │       ├── facts.py
    │       └── events.py
    │   ├── simulator
    │       └── __init__.py
    │   ├── _version.py
    │   └── schema.py
├── .python-version
├── .envrc
├── tests
    ├── ci
    │   └── __init__.py
    ├── __init__.py
    ├── domain
    │   └── __init__.py
    ├── fixtures
    │   ├── miniwk
    │   │   ├── __init__.py
    │   │   ├── expected_retrieval.json
    │   │   ├── data
    │   │   │   ├── manifest
    │   │   │   │   └── ingested.yaml
    │   │   │   └── normalized
    │   │   │   │   ├── 2025-02-03
    │   │   │   │       └── 2025-02-03-focus.yaml
    │   │   │   │   ├── 2025-02-04
    │   │   │   │       └── 2025-02-04-reflection.yaml
    │   │   │   │   └── 2025-02-05
    │   │   │   │       └── 2025-02-05-planning.yaml
    │   │   └── config.yaml
    │   └── __init__.py
    ├── io_tests
    │   ├── __init__.py
    │   └── test_artifacts.py
    ├── scripts
    │   ├── __init__.py
    │   └── test_check_structured_metrics.py
    ├── simulator
    │   ├── __init__.py
    │   └── test_human_simulator.py
    ├── commands
    │   ├── __init__.py
    │   └── test_microfact_prompts.py
    ├── pipelines
    │   ├── __init__.py
    │   ├── test_advise.py
    │   ├── test_summarize.py
    │   └── test_persona.py
    ├── common
    │   ├── __init__.py
    │   └── test_meta.py
    ├── prompts
    │   ├── __init__.py
    │   └── test_metadata_filtering.py
    ├── services
    │   ├── __init__.py
    │   ├── capture
    │   │   ├── __init__.py
    │   │   ├── test_summary_policy.py
    │   │   ├── test_graceful_profile_update.py
    │   │   ├── test_stage_summarize.py
    │   │   ├── test_stage_persona.py
    │   │   └── test_stage_facts.py
    │   └── test_claim_id_generation.py
    ├── test_sanity.py
    ├── test_coercion.py
    ├── test_schema_validation.py
    ├── test_cli_version.py
    ├── test_yaml_io.py
    ├── README.md
    ├── test_api_capture.py
    ├── test_cli_simulator.py
    ├── conftest.py
    ├── test_claim_atoms.py
    ├── test_cli_ollama_health.py
    └── test_cli_microfacts.py
├── scripts
    └── __init__.py
├── schemas
    └── core
    │   ├── aijournal.models.base.AijournalModel.json
    │   ├── aijournal.domain.evidence.SourceRef.json
    │   ├── aijournal.api.chat.ChatCitationRef.json
    │   ├── aijournal.domain.packs.TrimmedFile.json
    │   ├── aijournal.domain.advice.AdviceReference.json
    │   ├── aijournal.domain.journal.NormalizedEntity.json
    │   ├── aijournal.domain.claims.Scope.json
    │   ├── aijournal.domain.packs.PackEntry.json
    │   ├── aijournal.services.retriever.RetrievalMeta.json
    │   ├── aijournal.domain.chat.ChatTelemetry.json
    │   ├── aijournal.domain.facts.MicrofactConsolidationSummary.json
    │   ├── aijournal.domain.facts.DailySummary.json
    │   ├── aijournal.domain.events.FeedbackAdjustmentEvent.json
    │   ├── aijournal.domain.journal.Section.json
    │   ├── aijournal.domain.persona.InterviewQuestion.json
    │   ├── aijournal.domain.events.ClaimSignaturePayload.json
    │   ├── aijournal.domain.prompts.PromptMicroFact.json
    │   ├── aijournal.services.consolidator.ClaimSignature.json
    │   ├── aijournal.services.retriever.RetrievalFilters.json
    │   ├── aijournal.models.derived.ProfileUpdateInput.json
    │   ├── aijournal.domain.packs.PackMeta.json
    │   ├── aijournal.api.chat.ChatCitation.json
    │   ├── aijournal.models.authoritative.JournalEntry.json
    │   ├── aijournal.domain.claims.Provenance.json
    │   ├── aijournal.domain.prompts.PromptFacetItem.json
    │   ├── aijournal.domain.facts.MicrofactConsolidationLog.json
    │   ├── aijournal.domain.facts.MicroFact.json
    │   ├── aijournal.domain.advice.AdviceRecommendation.json
    │   ├── aijournal.domain.persona.InterviewSet.json
    │   ├── aijournal.api.chat.ChatResponse.json
    │   ├── aijournal.models.authoritative.SelfProfile.json
    │   ├── aijournal.common.meta.LLMResult.json
    │   ├── aijournal.domain.facts.ConsolidatedMicroFact.json
    │   ├── aijournal.domain.index.Chunk.json
    │   ├── aijournal.domain.events.FeedbackBatch.json
    │   ├── aijournal.domain.index.RetrievedChunk.json
    │   ├── aijournal.domain.chat_sessions.ChatLearningEntry.json
    │   ├── aijournal.common.meta.ArtifactMeta.json
    │   ├── aijournal.domain.changes.ClaimAtomInput.json
    │   ├── aijournal.models.authoritative.ManifestEntry.json
    │   ├── aijournal.domain.chat_sessions.ChatSessionSummary.json
    │   ├── aijournal.api.chat.ChatRequest.json
    │   ├── aijournal.domain.chat_sessions.ChatTranscriptTurn.json
    │   ├── aijournal.services.consolidator.ClaimConflict.json
    │   ├── aijournal.domain.events.ClaimConflictPayload.json
    │   ├── aijournal.domain.changes.FacetChange.json
    │   ├── aijournal.domain.packs.PackBundle.json
    │   ├── aijournal.domain.index.ChunkBatch.json
    │   └── aijournal.domain.facts.ConsolidatedMicrofactsFile.json
├── .githooks
    └── pre-push
├── .gitignore
├── prompts
    └── examples
    │   ├── extract_facts.json
    │   ├── summarize.json
    │   ├── interview.json
    │   ├── profile_update.json
    │   └── advise.json
├── .github
    └── workflows
    │   ├── schema.yml
    │   ├── ci.yml
    │   └── release.yml
├── .pre-commit-config.yaml
├── justfile
└── docs
    ├── prompt_improvement_request.md
    ├── archive
        └── 2025-10-29_CLI_MIGRATION.md
    └── design
        └── profile_update_inventory.md


/CLAUDE.md:
--------------------------------------------------------------------------------
1 | AGENTS.md


--------------------------------------------------------------------------------
/src/aijournal/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.12
2 | 


--------------------------------------------------------------------------------
/src/aijournal/__init__.py:
--------------------------------------------------------------------------------
1 | """aijournal package exports."""
2 | 


--------------------------------------------------------------------------------
/.envrc:
--------------------------------------------------------------------------------
1 | source .venv/bin/activate
2 | AIJOURNAL_WORKSPACE=workspace
3 | 


--------------------------------------------------------------------------------
/src/aijournal/services/__init__.py:
--------------------------------------------------------------------------------
1 | """Service utilities for aijournal."""
2 | 


--------------------------------------------------------------------------------
/src/aijournal/api/__init__.py:
--------------------------------------------------------------------------------
1 | """Public API schemas exposed by aijournal services."""
2 | 


--------------------------------------------------------------------------------
/src/aijournal/common/__init__.py:
--------------------------------------------------------------------------------
1 | """Common primitives shared across aijournal modules."""
2 | 


--------------------------------------------------------------------------------
/src/aijournal/io/__init__.py:
--------------------------------------------------------------------------------
1 | """I/O helpers for YAML, artifacts, and related formats."""
2 | 


--------------------------------------------------------------------------------
/tests/ci/__init__.py:
--------------------------------------------------------------------------------
1 | """Mark CI-focused tests as a concrete package for importers."""
2 | 


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | """Expose helper scripts as an importable package for tooling hooks."""
2 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Explicitly package the test suite so imports resolve consistently."""
2 | 


--------------------------------------------------------------------------------
/tests/domain/__init__.py:
--------------------------------------------------------------------------------
1 | """Bundle domain-level tests into a package to simplify discovery."""
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/miniwk/__init__.py:
--------------------------------------------------------------------------------
1 | """Package marker for the mini workspace fixture bundle."""
2 | 


--------------------------------------------------------------------------------
/tests/io_tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Mark IO tests as a concrete package for linting purposes."""
2 | 


--------------------------------------------------------------------------------
/tests/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | """Group scripted test helpers into a package for import clarity."""
2 | 


--------------------------------------------------------------------------------
/tests/simulator/__init__.py:
--------------------------------------------------------------------------------
1 | """Group simulator tests into a concrete package for discovery."""
2 | 


--------------------------------------------------------------------------------
/src/aijournal/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Stateless helpers shared across CLI, services, and pipelines."""
2 | 


--------------------------------------------------------------------------------
/tests/fixtures/__init__.py:
--------------------------------------------------------------------------------
1 | """Publish fixture helpers as a package for cleaner relative imports."""
2 | 


--------------------------------------------------------------------------------
/tests/commands/__init__.py:
--------------------------------------------------------------------------------
1 | """Keep command tests in a real package so shared helpers import cleanly."""
2 | 


--------------------------------------------------------------------------------
/tests/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | """Keep pipeline tests grouped in a package for consistent discovery."""
2 | 


--------------------------------------------------------------------------------
/tests/common/__init__.py:
--------------------------------------------------------------------------------
1 | """Treat shared test utilities as a true package for straightforward imports."""
2 | 


--------------------------------------------------------------------------------
/tests/prompts/__init__.py:
--------------------------------------------------------------------------------
1 | """Allow prompt tests to act as a package and avoid implicit-namespace lint."""
2 | 


--------------------------------------------------------------------------------
/tests/services/__init__.py:
--------------------------------------------------------------------------------
1 | """Expose the services tests as a package so linting tools resolve imports."""
2 | 


--------------------------------------------------------------------------------
/src/aijournal/commands/__init__.py:
--------------------------------------------------------------------------------
1 | """Command modules provide the Typer-facing orchestration for each feature."""
2 | 


--------------------------------------------------------------------------------
/tests/services/capture/__init__.py:
--------------------------------------------------------------------------------
1 | """Capture-service tests live here as a proper package for importer parity."""
2 | 


--------------------------------------------------------------------------------
/src/aijournal/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | """Pipeline modules orchestrate end-to-end workflows for specific CLI commands."""
2 | 


--------------------------------------------------------------------------------
/src/aijournal/services/capture/stages/__init__.py:
--------------------------------------------------------------------------------
1 | """Capture stage implementations exposed to the pipeline runner."""
2 | 


--------------------------------------------------------------------------------
/tests/test_sanity.py:
--------------------------------------------------------------------------------
1 | def test_sanity() -> None:
2 |     """Placeholder to keep pytest happy until real tests exist."""
3 |     assert True
4 | 


--------------------------------------------------------------------------------
/src/aijournal/common/types.py:
--------------------------------------------------------------------------------
1 | """Common typing aliases shared across the project."""
2 | 
3 | ISODateStr = str  # 'YYYY-MM-DD'
4 | TimestampStr = str  # ISO8601 string
5 | 


--------------------------------------------------------------------------------
/tests/fixtures/miniwk/expected_retrieval.json:
--------------------------------------------------------------------------------
1 | {
2 |   "query": "focus blocks",
3 |   "top": 5,
4 |   "expected_chunk_ids": [
5 |     "2025-02-04-reflection#c0"
6 |   ]
7 | }
8 | 


--------------------------------------------------------------------------------
/src/aijournal/models/__init__.py:
--------------------------------------------------------------------------------
1 | """Pydantic model package; import from explicit submodules (authoritative, derived, claim_atoms)."""
2 | 
3 | # Intentionally left empty to avoid implicit re-exports.
4 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.models.base.AijournalModel.json:
--------------------------------------------------------------------------------
1 | {
2 |   "description": "Project-specific base model that inherits strict settings.",
3 |   "properties": {},
4 |   "title": "AijournalModel",
5 |   "type": "object"
6 | }
7 | 


--------------------------------------------------------------------------------
/src/aijournal/domain/__init__.py:
--------------------------------------------------------------------------------
1 | """Domain models live in explicit submodules (changes, evidence, facts, persona, etc.)."""
2 | 
3 | # This package intentionally avoids re-export magic; import directly from the submodule you need.
4 | 


--------------------------------------------------------------------------------
/.githooks/pre-push:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -euo pipefail
 4 | 
 5 | echo "Running schema guard..."
 6 | uv run python scripts/check_schemas.py
 7 | 
 8 | echo "Running pytest..."
 9 | uv run pytest -q
10 | 
11 | echo "Running pre-commit hooks..."
12 | pre-commit run --all-files
13 | 


--------------------------------------------------------------------------------
/src/aijournal/models/base.py:
--------------------------------------------------------------------------------
 1 | """Shared base model for aijournal Pydantic schemas."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from aijournal.common.base import StrictModel
 6 | 
 7 | 
 8 | class AijournalModel(StrictModel):
 9 |     """Project-specific base model that inherits strict settings."""
10 | 


--------------------------------------------------------------------------------
/tests/fixtures/miniwk/data/manifest/ingested.yaml:
--------------------------------------------------------------------------------
 1 | - id: 2025-02-03-focus
 2 |   hash: hash-2025-02-03-focus
 3 |   source_type: journal
 4 | - id: 2025-02-04-reflection
 5 |   hash: hash-2025-02-04-reflection
 6 |   source_type: journal
 7 | - id: 2025-02-05-planning
 8 |   hash: hash-2025-02-05-planning
 9 |   source_type: journal
10 | 


--------------------------------------------------------------------------------
/src/aijournal/domain/evidence.py:
--------------------------------------------------------------------------------
 1 | """Domain models for evidence source references."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from aijournal.common.base import StrictModel
 6 | 
 7 | 
 8 | class SourceRef(StrictModel):
 9 |     """Reference to a normalized entry that supports a claim or fact."""
10 | 
11 |     entry_id: str
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python/uv
 2 | .venv/
 3 | __pycache__/
 4 | *.py[cod]
 5 | .ruff_cache/
 6 | .mypy_cache/
 7 | .pytest_cache/
 8 | 
 9 | # Local tooling
10 | .code/
11 | 
12 | # Data directories (optional to track; ignored by default for privacy)
13 | data/
14 | derived/
15 | profile/
16 | workspace/
17 | 
18 | # OS noise
19 | .DS_Store
20 | Thumbs.db
21 | 


--------------------------------------------------------------------------------
/prompts/examples/extract_facts.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "facts": [
 3 |     {
 4 |       "id": "focus-morning-block",
 5 |       "statement": "Morning focus blocks improve deep-work output.",
 6 |       "confidence": 0.82,
 7 |       "evidence_entry": "2025-10-25-focus-log",
 8 |       "evidence_para": 0
 9 |     }
10 |   ],
11 |   "claim_proposals": []
12 | }
13 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.evidence.SourceRef.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Reference to a normalized entry that supports a claim or fact.",
 3 |   "properties": {
 4 |     "entry_id": {
 5 |       "title": "Entry Id",
 6 |       "type": "string"
 7 |     }
 8 |   },
 9 |   "required": [
10 |     "entry_id"
11 |   ],
12 |   "title": "SourceRef",
13 |   "type": "object"
14 | }
15 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.api.chat.ChatCitationRef.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Reference emitted by the LLM; resolved against retrieved chunks.",
 3 |   "properties": {
 4 |     "code": {
 5 |       "minLength": 1,
 6 |       "title": "Code",
 7 |       "type": "string"
 8 |     }
 9 |   },
10 |   "required": [
11 |     "code"
12 |   ],
13 |   "title": "ChatCitationRef",
14 |   "type": "object"
15 | }
16 | 


--------------------------------------------------------------------------------
/prompts/examples/summarize.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "day": "2025-10-26",
 3 |   "bullets": [
 4 |     "Confirmed strict schema prompts with JSON fixtures.",
 5 |     "Documented fallback instructions for summarize output."
 6 |   ],
 7 |   "highlights": [
 8 |     "LLM outputs now align with DailySummary schema."
 9 |   ],
10 |   "todo_candidates": [
11 |     "Verify persona regeneration after strict schema rollout."
12 |   ]
13 | }
14 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.packs.TrimmedFile.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Record of a file trimmed due to token budget limits.",
 3 |   "properties": {
 4 |     "path": {
 5 |       "title": "Path",
 6 |       "type": "string"
 7 |     },
 8 |     "role": {
 9 |       "title": "Role",
10 |       "type": "string"
11 |     }
12 |   },
13 |   "required": [
14 |     "role",
15 |     "path"
16 |   ],
17 |   "title": "TrimmedFile",
18 |   "type": "object"
19 | }
20 | 


--------------------------------------------------------------------------------
/tests/fixtures/miniwk/data/normalized/2025-02-03/2025-02-03-focus.yaml:
--------------------------------------------------------------------------------
 1 | id: 2025-02-03-focus
 2 | created_at: "2025-02-03T09:00:00Z"
 3 | source_path: data/journal/2025/02/03/2025-02-03-focus.md
 4 | title: Focus
 5 | tags:
 6 |   - focus
 7 |   - planning
 8 | summary: Protected two focus blocks for roadmap prep.
 9 | sections:
10 |   - heading: Highlights
11 |     summary: Protected two focus blocks for roadmap prep.
12 | source_hash: hash-2025-02-03-focus
13 | source_type: journal
14 | 


--------------------------------------------------------------------------------
/prompts/examples/interview.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "questions": [
 3 |     {
 4 |       "id": "morning-reflection-frequency",
 5 |       "text": "How often do post-focus reflections get skipped?",
 6 |       "target_facet": "habits.focus_block",
 7 |       "priority": "high"
 8 |     },
 9 |     {
10 |       "id": "travel-adjustments",
11 |       "text": "What changes during travel weeks disrupt focus blocks?",
12 |       "target_facet": "claim:goal.focus_hours_per_week",
13 |       "priority": "medium"
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/fixtures/miniwk/data/normalized/2025-02-04/2025-02-04-reflection.yaml:
--------------------------------------------------------------------------------
 1 | id: 2025-02-04-reflection
 2 | created_at: "2025-02-04T08:30:00Z"
 3 | source_path: data/journal/2025/02/04/2025-02-04-reflection.md
 4 | title: Reflection
 5 | tags:
 6 |   - reflection
 7 |   - focus
 8 | summary: Reflected on how consistent focus blocks supported calm execution.
 9 | sections:
10 |   - heading: Reflection
11 |     summary: Focus rituals kept delivery predictable.
12 | source_hash: hash-2025-02-04-reflection
13 | source_type: journal
14 | 


--------------------------------------------------------------------------------
/tests/fixtures/miniwk/data/normalized/2025-02-05/2025-02-05-planning.yaml:
--------------------------------------------------------------------------------
 1 | id: 2025-02-05-planning
 2 | created_at: "2025-02-05T07:45:00Z"
 3 | source_path: data/journal/2025/02/05/2025-02-05-planning.md
 4 | title: Planning
 5 | tags:
 6 |   - planning
 7 |   - sprint
 8 | summary: Planned sprint tasks and scheduled deep work blocks for writing.
 9 | sections:
10 |   - heading: Planning
11 |     summary: Prioritized sprint stories and held time for writing.
12 | source_hash: hash-2025-02-05-planning
13 | source_type: journal
14 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.advice.AdviceReference.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "References included to ground why advice fits.",
 3 |   "properties": {
 4 |     "claims": {
 5 |       "items": {
 6 |         "type": "string"
 7 |       },
 8 |       "title": "Claims",
 9 |       "type": "array"
10 |     },
11 |     "facets": {
12 |       "items": {
13 |         "type": "string"
14 |       },
15 |       "title": "Facets",
16 |       "type": "array"
17 |     }
18 |   },
19 |   "title": "AdviceReference",
20 |   "type": "object"
21 | }
22 | 


--------------------------------------------------------------------------------
/src/aijournal/common/base.py:
--------------------------------------------------------------------------------
 1 | """Strict Pydantic base classes used across the project."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pydantic import BaseModel, ConfigDict
 6 | 
 7 | 
 8 | class StrictModel(BaseModel):
 9 |     """Pydantic model with strict settings and ignored extras."""
10 | 
11 |     model_config = ConfigDict(
12 |         extra="ignore",
13 |         validate_assignment=True,
14 |         arbitrary_types_allowed=False,
15 |         populate_by_name=True,
16 |         protected_namespaces=(),
17 |     )
18 | 


--------------------------------------------------------------------------------
/src/aijournal/simulator/__init__.py:
--------------------------------------------------------------------------------
 1 | """Human-style simulator utilities for exercising the capture pipeline."""
 2 | 
 3 | from .fixtures import FixtureEntry, FixtureWorkspace, build_fixture_workspace
 4 | from .orchestrator import HumanSimulator, SimulationReport
 5 | from .validators import ValidationFailure, ValidationReport
 6 | 
 7 | __all__ = [
 8 |     "FixtureEntry",
 9 |     "FixtureWorkspace",
10 |     "HumanSimulator",
11 |     "SimulationReport",
12 |     "ValidationFailure",
13 |     "ValidationReport",
14 |     "build_fixture_workspace",
15 | ]
16 | 


--------------------------------------------------------------------------------
/tests/test_coercion.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from aijournal.utils.coercion import coerce_float, coerce_int
 4 | 
 5 | 
 6 | def test_coerce_float_handles_invalid_values() -> None:
 7 |     assert coerce_float("1.5") == 1.5
 8 |     assert coerce_float(None) is None
 9 |     assert coerce_float("not-a-number") is None
10 | 
11 | 
12 | def test_coerce_int_handles_invalid_values() -> None:
13 |     assert coerce_int("7") == 7
14 |     assert coerce_int(None) is None
15 |     assert coerce_int(3.9) == 3
16 |     assert coerce_int("oops") is None
17 | 


--------------------------------------------------------------------------------
/.github/workflows/schema.yml:
--------------------------------------------------------------------------------
 1 | name: Schema Check
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 | 
 7 | jobs:
 8 |   schema:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/checkout@v4
12 | 
13 |       - name: Set up Python
14 |         uses: actions/setup-python@v6
15 |         with:
16 |           python-version: "3.13"
17 | 
18 |       - name: Install uv
19 |         uses: astral-sh/setup-uv@v7
20 | 
21 |       - name: Sync dependencies
22 |         run: uv sync
23 | 
24 |       - name: Verify schemas
25 |         run: uv run python scripts/check_schemas.py
26 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.journal.NormalizedEntity.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Structured entity extracted during normalization.",
 3 |   "properties": {
 4 |     "extra": {
 5 |       "additionalProperties": true,
 6 |       "title": "Extra",
 7 |       "type": "object"
 8 |     },
 9 |     "type": {
10 |       "title": "Type",
11 |       "type": "string"
12 |     },
13 |     "value": {
14 |       "title": "Value",
15 |       "type": "string"
16 |     }
17 |   },
18 |   "required": [
19 |     "type",
20 |     "value"
21 |   ],
22 |   "title": "NormalizedEntity",
23 |   "type": "object"
24 | }
25 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.claims.Scope.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Contextual qualifiers for a claim atom.",
 3 |   "properties": {
 4 |     "context": {
 5 |       "items": {
 6 |         "type": "string"
 7 |       },
 8 |       "title": "Context",
 9 |       "type": "array"
10 |     },
11 |     "domain": {
12 |       "anyOf": [
13 |         {
14 |           "type": "string"
15 |         },
16 |         {
17 |           "type": "null"
18 |         }
19 |       ],
20 |       "default": null,
21 |       "title": "Domain"
22 |     }
23 |   },
24 |   "title": "Scope",
25 |   "type": "object"
26 | }
27 | 


--------------------------------------------------------------------------------
/tests/test_schema_validation.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import pytest
 4 | 
 5 | from aijournal.schema import SchemaValidationError, validate_schema
 6 | 
 7 | 
 8 | def test_validate_schema_raises_with_aggregate_errors() -> None:
 9 |     payload = {"unexpected": True}
10 | 
11 |     with pytest.raises(SchemaValidationError) as excinfo:
12 |         validate_schema("summary", payload)
13 | 
14 |     err = excinfo.value
15 |     assert err.schema == "summary"
16 |     assert err.errors
17 |     assert "Field required" in err.errors[0]
18 |     assert "Schema 'summary' validation failed" in str(err)
19 | 


--------------------------------------------------------------------------------
/src/aijournal/services/microfacts/__init__.py:
--------------------------------------------------------------------------------
 1 | """Microfact service utilities (indexing, consolidation, snapshots)."""
 2 | 
 3 | from .index import (
 4 |     MicrofactConsolidationStats,
 5 |     MicrofactIndex,
 6 |     MicrofactMatch,
 7 |     MicrofactRebuildResult,
 8 |     MicrofactRecord,
 9 | )
10 | from .snapshot import load_consolidated_microfacts, select_recurring_facts
11 | 
12 | __all__ = [
13 |     "MicrofactConsolidationStats",
14 |     "MicrofactIndex",
15 |     "MicrofactMatch",
16 |     "MicrofactRebuildResult",
17 |     "MicrofactRecord",
18 |     "load_consolidated_microfacts",
19 |     "select_recurring_facts",
20 | ]
21 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | default_stages: [pre-commit]
 2 | exclude: "^(data/|derived/|.venv/|.code/)"
 3 | repos:
 4 |   - repo: local
 5 |     hooks:
 6 |       - id: ruff
 7 |         name: ruff
 8 |         entry: uvx ruff check --fix
 9 |         language: system
10 |         pass_filenames: true
11 |         types: [python]
12 |       - id: ruff-format
13 |         name: ruff-format
14 |         entry: uvx ruff format
15 |         language: system
16 |         pass_filenames: true
17 |         types: [python]
18 |       - id: mypy
19 |         name: mypy
20 |         entry: uv run mypy src
21 |         language: system
22 |         pass_filenames: false
23 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["main"]
 6 |   pull_request:
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v4
13 | 
14 |       - name: Set up Python
15 |         uses: actions/setup-python@v5
16 |         with:
17 |           python-version: "3.13"
18 | 
19 |       - name: Install uv
20 |         uses: astral-sh/setup-uv@v7
21 | 
22 |       - name: Sync dependencies
23 |         run: uv sync
24 | 
25 |       - name: Run tests
26 |         run: uv run pytest -q
27 | 
28 |       - name: Run pre-commit
29 |         run: uvx pre-commit run --all-files
30 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.packs.PackEntry.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Single file included in an export pack.",
 3 |   "properties": {
 4 |     "content": {
 5 |       "title": "Content",
 6 |       "type": "string"
 7 |     },
 8 |     "path": {
 9 |       "title": "Path",
10 |       "type": "string"
11 |     },
12 |     "role": {
13 |       "title": "Role",
14 |       "type": "string"
15 |     },
16 |     "tokens": {
17 |       "title": "Tokens",
18 |       "type": "integer"
19 |     }
20 |   },
21 |   "required": [
22 |     "role",
23 |     "path",
24 |     "tokens",
25 |     "content"
26 |   ],
27 |   "title": "PackEntry",
28 |   "type": "object"
29 | }
30 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Upload Python Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | jobs:
 8 |   deploy:
 9 |     runs-on: ubuntu-latest
10 |     environment:
11 |       name: pypi
12 |       url: https://pypi.org/p/${{ github.repository }}
13 |     permissions:
14 |       id-token: write
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |         with:
18 |           fetch-depth: 0
19 |       - name: Install uv
20 |         uses: astral-sh/setup-uv@v6
21 |       - name: Build
22 |         run: uv build
23 |       - name: Publish package distributions to PyPI
24 |         uses: pypa/gh-action-pypi-publish@release/v1
25 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.services.retriever.RetrievalMeta.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Metadata describing a retrieval invocation.",
 3 |   "properties": {
 4 |     "fake_mode": {
 5 |       "title": "Fake Mode",
 6 |       "type": "boolean"
 7 |     },
 8 |     "k": {
 9 |       "title": "K",
10 |       "type": "integer"
11 |     },
12 |     "mode": {
13 |       "title": "Mode",
14 |       "type": "string"
15 |     },
16 |     "source": {
17 |       "title": "Source",
18 |       "type": "string"
19 |     }
20 |   },
21 |   "required": [
22 |     "mode",
23 |     "source",
24 |     "k",
25 |     "fake_mode"
26 |   ],
27 |   "title": "RetrievalMeta",
28 |   "type": "object"
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/test_cli_version.py:
--------------------------------------------------------------------------------
 1 | """Smoke test for the `aij version` command."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typer.testing import CliRunner
 6 | 
 7 | from aijournal import _version
 8 | from aijournal.cli import app
 9 | 
10 | 
11 | def test_version_command(monkeypatch: CliRunner) -> None:
12 |     # Force a deterministic version so we can assert the output
13 |     monkeypatch.setattr(_version, "__version__", "9.3.1-test")
14 |     runner = CliRunner()
15 | 
16 |     result = runner.invoke(app, ["version"])
17 | 
18 |     assert result.exit_code == 0
19 |     assert "aijournal version: 9.3.1-test" in result.stdout
20 |     assert "source root:" in result.stdout
21 | 


--------------------------------------------------------------------------------
/justfile:
--------------------------------------------------------------------------------
 1 | set shell := ["/bin/sh", "-c"]
 2 | 
 3 | default:
 4 |     @just --help
 5 | 
 6 | test:
 7 |     uv run pytest -q
 8 | 
 9 | test_cov:
10 |     uv run pytest --cov=src -q
11 | 
12 | mypy:
13 |     uv run mypy src
14 | 
15 | lint:
16 |     uv run ruff check src tests
17 | 
18 | fmt:
19 |     uv run ruff format src tests
20 | 
21 | health:
22 |     uv run aijournal ollama health
23 | 
24 | fake_on:
25 |     echo "export AIJOURNAL_FAKE_OLLAMA=1"
26 | 
27 | ci:
28 |     uv run pytest -q && uv run mypy src && uv run python scripts/check_schemas.py
29 | 
30 | precommit_dry:
31 |     uvx pre-commit run --all-files --show-diff-on-failure
32 | 
33 | precommit:
34 |     uvx pre-commit run --all-files
35 | 


--------------------------------------------------------------------------------
/tests/fixtures/miniwk/config.yaml:
--------------------------------------------------------------------------------
 1 | model: "gpt-oss:20b"
 2 | temperature: 0.2
 3 | seed: 42
 4 | paths:
 5 |   data: "data"
 6 |   profile: "profile"
 7 |   derived: "derived"
 8 |   prompts: "prompts"
 9 | impact_weights:
10 |   values_goals: 1.5
11 |   decision_style: 1.3
12 |   affect_energy: 1.2
13 |   traits: 1.0
14 |   social: 0.9
15 |   claims: 1.0
16 |   claim_types:
17 |     value: 1.4
18 |     goal: 1.4
19 |     boundary: 1.3
20 |     trait: 1.2
21 |     preference: 1.0
22 |     habit: 0.9
23 |     aversion: 1.1
24 |     skill: 1.0
25 | advisor:
26 |   max_recos: 3
27 |   include_risks: true
28 | token_estimator:
29 |   char_per_token: 4.2
30 | persona:
31 |   token_budget: 1200
32 |   max_claims: 24
33 |   min_claims: 8
34 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.chat.ChatTelemetry.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Telemetry captured during a chat turn.",
 3 |   "properties": {
 4 |     "chunk_count": {
 5 |       "title": "Chunk Count",
 6 |       "type": "integer"
 7 |     },
 8 |     "model": {
 9 |       "title": "Model",
10 |       "type": "string"
11 |     },
12 |     "retrieval_ms": {
13 |       "title": "Retrieval Ms",
14 |       "type": "number"
15 |     },
16 |     "retriever_source": {
17 |       "title": "Retriever Source",
18 |       "type": "string"
19 |     }
20 |   },
21 |   "required": [
22 |     "retrieval_ms",
23 |     "chunk_count",
24 |     "retriever_source",
25 |     "model"
26 |   ],
27 |   "title": "ChatTelemetry",
28 |   "type": "object"
29 | }
30 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.facts.MicrofactConsolidationSummary.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Per-day summary emitted during rebuild operations.",
 3 |   "properties": {
 4 |     "day": {
 5 |       "title": "Day",
 6 |       "type": "string"
 7 |     },
 8 |     "merged_records": {
 9 |       "title": "Merged Records",
10 |       "type": "integer"
11 |     },
12 |     "new_records": {
13 |       "title": "New Records",
14 |       "type": "integer"
15 |     },
16 |     "processed": {
17 |       "title": "Processed",
18 |       "type": "integer"
19 |     }
20 |   },
21 |   "required": [
22 |     "day",
23 |     "processed",
24 |     "new_records",
25 |     "merged_records"
26 |   ],
27 |   "title": "MicrofactConsolidationSummary",
28 |   "type": "object"
29 | }
30 | 


--------------------------------------------------------------------------------
/src/aijournal/_version.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | __version__ = ""
 6 | 
 7 | if not __version__:
 8 |     try:
 9 |         import versioningit
10 |         from versioningit.errors import Error as VersioningitError
11 |     except ImportError:  # pragma: no cover
12 |         import importlib.metadata
13 | 
14 |         __version__ = importlib.metadata.version("aijournal")
15 |     else:
16 |         PROJECT_DIR = Path(__file__).resolve().parents[2]
17 |         try:
18 |             __version__ = versioningit.get_version(project_dir=PROJECT_DIR)
19 |         except VersioningitError:
20 |             import importlib.metadata
21 | 
22 |             __version__ = importlib.metadata.version("aijournal")
23 | 


--------------------------------------------------------------------------------
/src/aijournal/utils/coercion.py:
--------------------------------------------------------------------------------
 1 | """Lightweight helpers for coercing loosely typed config values."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Any
 6 | 
 7 | 
 8 | def coerce_float(value: Any) -> float | None:
 9 |     """Best-effort float conversion; returns None when coercion fails."""
10 |     if value is None:
11 |         return None
12 |     try:
13 |         return float(value)
14 |     except (TypeError, ValueError):
15 |         return None
16 | 
17 | 
18 | def coerce_int(value: Any) -> int | None:
19 |     """Best-effort int conversion; returns None when coercion fails."""
20 |     if value is None:
21 |         return None
22 |     try:
23 |         return int(value)
24 |     except (TypeError, ValueError):
25 |         return None
26 | 


--------------------------------------------------------------------------------
/tests/test_yaml_io.py:
--------------------------------------------------------------------------------
 1 | """Behavioral tests for YAML serialization helpers."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from aijournal.io.yaml_io import dump_yaml
 6 | 
 7 | 
 8 | def test_dump_yaml_preserves_unicode_characters() -> None:
 9 |     payload = {"title": "Karakter – F. Bordewijk 📖"}
10 | 
11 |     serialized = dump_yaml(payload)
12 | 
13 |     assert "Karakter – F. Bordewijk 📖" in serialized
14 |     assert "\\u" not in serialized
15 | 
16 | 
17 | def test_dump_yaml_uses_literal_block_for_multiline_strings() -> None:
18 |     payload = {"summary": "*1938*\n\n> Op dat ogenblik"}
19 | 
20 |     serialized = dump_yaml(payload)
21 | 
22 |     assert "summary: |" in serialized
23 |     assert "*1938*" in serialized.splitlines()[1]
24 |     assert "  > Op dat ogenblik" in serialized
25 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.facts.DailySummary.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Derived day summary (PLAN §4.1).",
 3 |   "properties": {
 4 |     "bullets": {
 5 |       "items": {
 6 |         "type": "string"
 7 |       },
 8 |       "title": "Bullets",
 9 |       "type": "array"
10 |     },
11 |     "day": {
12 |       "title": "Day",
13 |       "type": "string"
14 |     },
15 |     "highlights": {
16 |       "items": {
17 |         "type": "string"
18 |       },
19 |       "title": "Highlights",
20 |       "type": "array"
21 |     },
22 |     "todo_candidates": {
23 |       "items": {
24 |         "type": "string"
25 |       },
26 |       "title": "Todo Candidates",
27 |       "type": "array"
28 |     }
29 |   },
30 |   "required": [
31 |     "day"
32 |   ],
33 |   "title": "DailySummary",
34 |   "type": "object"
35 | }
36 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.events.FeedbackAdjustmentEvent.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Record of a claim strength adjustment triggered by chat feedback.",
 3 |   "properties": {
 4 |     "claim_id": {
 5 |       "title": "Claim Id",
 6 |       "type": "string"
 7 |     },
 8 |     "delta": {
 9 |       "title": "Delta",
10 |       "type": "number"
11 |     },
12 |     "kind": {
13 |       "default": "feedback",
14 |       "title": "Kind",
15 |       "type": "string"
16 |     },
17 |     "new_strength": {
18 |       "title": "New Strength",
19 |       "type": "number"
20 |     },
21 |     "old_strength": {
22 |       "title": "Old Strength",
23 |       "type": "number"
24 |     }
25 |   },
26 |   "required": [
27 |     "claim_id",
28 |     "old_strength",
29 |     "new_strength",
30 |     "delta"
31 |   ],
32 |   "title": "FeedbackAdjustmentEvent",
33 |   "type": "object"
34 | }
35 | 


--------------------------------------------------------------------------------
/tests/services/capture/test_summary_policy.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from aijournal.services.capture.stages import stage0_persist as stage0
 4 | 
 5 | 
 6 | def test_missing_summary_uses_first_paragraph() -> None:
 7 |     body = (
 8 |         "First line with extra   spaces.\nStill first paragraph.\n\nSecond paragraph ignores this."
 9 |     )
10 |     summary = stage0._derive_summary_text(None, body)
11 |     assert summary == "First line with extra spaces. Still first paragraph."
12 | 
13 | 
14 | def test_existing_summary_remains_unchanged() -> None:
15 |     summary = stage0._derive_summary_text("Custom summary", "Body text")
16 |     assert summary == "Custom summary"
17 | 
18 | 
19 | def test_long_summary_truncates_with_ellipsis() -> None:
20 |     body = "Lorem ipsum " * 50  # >400 chars
21 |     summary = stage0._derive_summary_text(None, body, max_chars=100)
22 |     assert summary.endswith("...")
23 |     assert len(summary) <= 103
24 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.journal.Section.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Normalized representation of a markdown heading or section.",
 3 |   "properties": {
 4 |     "heading": {
 5 |       "title": "Heading",
 6 |       "type": "string"
 7 |     },
 8 |     "level": {
 9 |       "default": 1,
10 |       "title": "Level",
11 |       "type": "integer"
12 |     },
13 |     "para_index": {
14 |       "anyOf": [
15 |         {
16 |           "type": "integer"
17 |         },
18 |         {
19 |           "type": "null"
20 |         }
21 |       ],
22 |       "default": null,
23 |       "title": "Para Index"
24 |     },
25 |     "summary": {
26 |       "anyOf": [
27 |         {
28 |           "type": "string"
29 |         },
30 |         {
31 |           "type": "null"
32 |         }
33 |       ],
34 |       "default": null,
35 |       "title": "Summary"
36 |     }
37 |   },
38 |   "required": [
39 |     "heading"
40 |   ],
41 |   "title": "Section",
42 |   "type": "object"
43 | }
44 | 


--------------------------------------------------------------------------------
/src/aijournal/domain/enums.py:
--------------------------------------------------------------------------------
 1 | """Shared enum types used across aijournal domain models."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from enum import StrEnum
 6 | 
 7 | 
 8 | class ClaimType(StrEnum):
 9 |     PREFERENCE = "preference"
10 |     VALUE = "value"
11 |     GOAL = "goal"
12 |     BOUNDARY = "boundary"
13 |     TRAIT = "trait"
14 |     HABIT = "habit"
15 |     AVERSION = "aversion"
16 |     SKILL = "skill"
17 | 
18 | 
19 | class ClaimStatus(StrEnum):
20 |     ACCEPTED = "accepted"
21 |     TENTATIVE = "tentative"
22 |     REJECTED = "rejected"
23 | 
24 | 
25 | class FacetOperation(StrEnum):
26 |     SET = "set"
27 |     REMOVE = "remove"
28 |     MERGE = "merge"
29 | 
30 | 
31 | class ClaimEventAction(StrEnum):
32 |     UPSERT = "upsert"
33 |     UPDATE = "update"
34 |     DELETE = "delete"
35 |     CONFLICT = "conflict"
36 |     STRENGTH_DELTA = "strength_delta"
37 | 
38 | 
39 | class FeedbackDirection(StrEnum):
40 |     UP = "up"
41 |     DOWN = "down"
42 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.persona.InterviewQuestion.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Structured interview question proposed by the characterization pipeline.",
 3 |   "properties": {
 4 |     "id": {
 5 |       "title": "Id",
 6 |       "type": "string"
 7 |     },
 8 |     "priority": {
 9 |       "anyOf": [
10 |         {
11 |           "type": "string"
12 |         },
13 |         {
14 |           "type": "null"
15 |         }
16 |       ],
17 |       "default": null,
18 |       "title": "Priority"
19 |     },
20 |     "target_facet": {
21 |       "anyOf": [
22 |         {
23 |           "type": "string"
24 |         },
25 |         {
26 |           "type": "null"
27 |         }
28 |       ],
29 |       "default": null,
30 |       "title": "Target Facet"
31 |     },
32 |     "text": {
33 |       "title": "Text",
34 |       "type": "string"
35 |     }
36 |   },
37 |   "required": [
38 |     "id",
39 |     "text"
40 |   ],
41 |   "title": "InterviewQuestion",
42 |   "type": "object"
43 | }
44 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.events.ClaimSignaturePayload.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Serialized signature describing the target slot for a claim.",
 3 |   "properties": {
 4 |     "claim_type": {
 5 |       "title": "Claim Type",
 6 |       "type": "string"
 7 |     },
 8 |     "context": {
 9 |       "items": {
10 |         "type": "string"
11 |       },
12 |       "title": "Context",
13 |       "type": "array"
14 |     },
15 |     "domain": {
16 |       "anyOf": [
17 |         {
18 |           "type": "string"
19 |         },
20 |         {
21 |           "type": "null"
22 |         }
23 |       ],
24 |       "default": null,
25 |       "title": "Domain"
26 |     },
27 |     "predicate": {
28 |       "title": "Predicate",
29 |       "type": "string"
30 |     },
31 |     "subject": {
32 |       "title": "Subject",
33 |       "type": "string"
34 |     }
35 |   },
36 |   "required": [
37 |     "claim_type",
38 |     "subject",
39 |     "predicate"
40 |   ],
41 |   "title": "ClaimSignaturePayload",
42 |   "type": "object"
43 | }
44 | 


--------------------------------------------------------------------------------
/src/aijournal/domain/persona.py:
--------------------------------------------------------------------------------
 1 | """Persona and interview domain models for strict schema alignment."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Any
 6 | 
 7 | from pydantic import Field
 8 | 
 9 | from aijournal.common.base import StrictModel
10 | from aijournal.domain.claims import ClaimAtom  # noqa: TC001
11 | 
12 | 
13 | class PersonaCore(StrictModel):
14 |     """Primary persona payload used by chat/advise pipelines."""
15 | 
16 |     profile: dict[str, Any] = Field(default_factory=dict)
17 |     claims: list[ClaimAtom] = Field(default_factory=list)
18 | 
19 | 
20 | class InterviewQuestion(StrictModel):
21 |     """Structured interview question proposed by the characterization pipeline."""
22 | 
23 |     id: str
24 |     text: str
25 |     target_facet: str | None = None
26 |     priority: str | None = None
27 | 
28 | 
29 | class InterviewSet(StrictModel):
30 |     """Collection of interview questions to review with the operator."""
31 | 
32 |     questions: list[InterviewQuestion] = Field(default_factory=list)
33 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.prompts.PromptMicroFact.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Lightweight micro-fact emitted by the LLM.",
 3 |   "properties": {
 4 |     "confidence": {
 5 |       "anyOf": [
 6 |         {
 7 |           "maximum": 1.0,
 8 |           "minimum": 0.0,
 9 |           "type": "number"
10 |         },
11 |         {
12 |           "type": "null"
13 |         }
14 |       ],
15 |       "default": null,
16 |       "title": "Confidence"
17 |     },
18 |     "evidence_entry": {
19 |       "anyOf": [
20 |         {
21 |           "type": "string"
22 |         },
23 |         {
24 |           "type": "null"
25 |         }
26 |       ],
27 |       "default": null,
28 |       "title": "Evidence Entry"
29 |     },
30 |     "id": {
31 |       "title": "Id",
32 |       "type": "string"
33 |     },
34 |     "statement": {
35 |       "maxLength": 500,
36 |       "title": "Statement",
37 |       "type": "string"
38 |     }
39 |   },
40 |   "required": [
41 |     "id",
42 |     "statement"
43 |   ],
44 |   "title": "PromptMicroFact",
45 |   "type": "object"
46 | }
47 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.services.consolidator.ClaimSignature.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "properties": {
 3 |     "claim_type": {
 4 |       "title": "Claim Type",
 5 |       "type": "string"
 6 |     },
 7 |     "predicate": {
 8 |       "title": "Predicate",
 9 |       "type": "string"
10 |     },
11 |     "scope": {
12 |       "maxItems": 2,
13 |       "minItems": 2,
14 |       "prefixItems": [
15 |         {
16 |           "anyOf": [
17 |             {
18 |               "type": "string"
19 |             },
20 |             {
21 |               "type": "null"
22 |             }
23 |           ]
24 |         },
25 |         {
26 |           "items": {
27 |             "type": "string"
28 |           },
29 |           "type": "array"
30 |         }
31 |       ],
32 |       "title": "Scope",
33 |       "type": "array"
34 |     },
35 |     "subject": {
36 |       "title": "Subject",
37 |       "type": "string"
38 |     }
39 |   },
40 |   "required": [
41 |     "claim_type",
42 |     "subject",
43 |     "predicate",
44 |     "scope"
45 |   ],
46 |   "title": "ClaimSignature",
47 |   "type": "object"
48 | }
49 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | # Tests
 2 | 
 3 | `uv run pytest -q` exercises the full CLI surface plus the shared Pydantic helpers. Most test
 4 | modules export a `_has_command` guard so partially implemented commands can be skipped without
 5 | breaking the suite.
 6 | 
 7 | Key suites:
 8 | 
 9 | - `tests/test_models_io.py` — round-trip coverage for every Pydantic model. Ensures the runtime
10 |   validation remains aligned with the Python definitions.
11 | - `tests/test_cli_*.py` — functional coverage for init/new/ingest/normalize/summarize/facts/profile
12 |   flows, all running with `AIJOURNAL_FAKE_OLLAMA=1` so CI never needs a model.
13 | - `tests/test_cli_pack.py` — validates packing logic, trim ordering, and token budgeting.
14 | 
15 | When developing locally, set `AIJOURNAL_FAKE_OLLAMA=1` before running tests to avoid hitting a live
16 | model:
17 | 
18 | ```sh
19 | export AIJOURNAL_FAKE_OLLAMA=1
20 | uv run pytest -q
21 | ```
22 | 
23 | The CLI automatically falls back to fake fixtures if an Ollama call fails, but exporting the env var
24 | keeps results deterministic for golden snapshots.
25 | 


--------------------------------------------------------------------------------
/prompts/examples/profile_update.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "claims": [
 3 |     {
 4 |       "type": "habit",
 5 |       "statement": "Blocks 8–10am for focus sprints three mornings a week.",
 6 |       "subject": "focus blocks",
 7 |       "predicate": "maintains",
 8 |       "strength": 0.72,
 9 |       "status": "tentative",
10 |       "scope_domain": "work",
11 |       "scope_context": [
12 |         "weekday",
13 |         "solo"
14 |       ],
15 |       "reason": "Entries + summary cite recurring 8–10am deep work blocks.",
16 |       "evidence_entry": "2025-10-28-focus-reset",
17 |       "evidence_para": 0
18 |     }
19 |   ],
20 |   "facets": [
21 |     {
22 |       "path": "planning.focus_blocks.morning",
23 |       "operation": "set",
24 |       "value": "Protects 8–10am Tue–Thu for uninterrupted build work.",
25 |       "reason": "Microfact + highlights stress morning protection ritual.",
26 |       "evidence_entry": "2025-10-28-focus-reset",
27 |       "evidence_para": 1
28 |     }
29 |   ],
30 |   "interview_prompts": [
31 |     "What exceptions force skipping the 8–10am block?"
32 |   ]
33 | }
34 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.services.retriever.RetrievalFilters.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Optional filters applied during retrieval.",
 3 |   "properties": {
 4 |     "date_from": {
 5 |       "anyOf": [
 6 |         {
 7 |           "type": "string"
 8 |         },
 9 |         {
10 |           "type": "null"
11 |         }
12 |       ],
13 |       "default": null,
14 |       "title": "Date From"
15 |     },
16 |     "date_to": {
17 |       "anyOf": [
18 |         {
19 |           "type": "string"
20 |         },
21 |         {
22 |           "type": "null"
23 |         }
24 |       ],
25 |       "default": null,
26 |       "title": "Date To"
27 |     },
28 |     "source_types": {
29 |       "items": {
30 |         "type": "string"
31 |       },
32 |       "title": "Source Types",
33 |       "type": "array",
34 |       "uniqueItems": true
35 |     },
36 |     "tags": {
37 |       "items": {
38 |         "type": "string"
39 |       },
40 |       "title": "Tags",
41 |       "type": "array",
42 |       "uniqueItems": true
43 |     }
44 |   },
45 |   "title": "RetrievalFilters",
46 |   "type": "object"
47 | }
48 | 


--------------------------------------------------------------------------------
/src/aijournal/domain/packs.py:
--------------------------------------------------------------------------------
 1 | """Strict models representing export packs."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pydantic import Field
 6 | 
 7 | from aijournal.common.base import StrictModel
 8 | from aijournal.common.types import TimestampStr  # noqa: TC001
 9 | 
10 | 
11 | class PackEntry(StrictModel):
12 |     """Single file included in an export pack."""
13 | 
14 |     role: str
15 |     path: str
16 |     tokens: int
17 |     content: str
18 | 
19 | 
20 | class TrimmedFile(StrictModel):
21 |     """Record of a file trimmed due to token budget limits."""
22 | 
23 |     role: str
24 |     path: str
25 | 
26 | 
27 | class PackMeta(StrictModel):
28 |     """Metadata describing the assembled pack."""
29 | 
30 |     total_tokens: int
31 |     max_tokens: int
32 |     trimmed: list[TrimmedFile] = Field(default_factory=list)
33 |     generated_at: TimestampStr
34 | 
35 | 
36 | class PackBundle(StrictModel):
37 |     """Structured representation of a pack export."""
38 | 
39 |     level: str
40 |     date: str
41 |     files: list[PackEntry] = Field(default_factory=list)
42 |     meta: PackMeta
43 | 


--------------------------------------------------------------------------------
/src/aijournal/models/claim_atoms.py:
--------------------------------------------------------------------------------
 1 | """Compatibility shim re-exporting strict claim models from `aijournal.domain.claims`."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import warnings
 6 | 
 7 | from aijournal.domain.claims import (
 8 |     ClaimAtom as _ClaimAtom,
 9 | )
10 | from aijournal.domain.claims import (
11 |     ClaimAtomsFile as _ClaimAtomsFile,
12 | )
13 | from aijournal.domain.claims import ClaimSource
14 | from aijournal.domain.claims import (
15 |     Provenance as _Provenance,
16 | )
17 | from aijournal.domain.claims import (
18 |     Scope as _Scope,
19 | )
20 | from aijournal.domain.enums import ClaimStatus, ClaimType
21 | 
22 | warnings.warn(
23 |     "Import claim models from `aijournal.domain.claims` instead of `aijournal.models.claim_atoms`.",
24 |     DeprecationWarning,
25 |     stacklevel=2,
26 | )
27 | 
28 | Scope = _Scope
29 | Provenance = _Provenance
30 | ClaimAtom = _ClaimAtom
31 | ClaimAtomsFile = _ClaimAtomsFile
32 | 
33 | __all__ = [
34 |     "ClaimAtom",
35 |     "ClaimAtomsFile",
36 |     "ClaimSource",
37 |     "ClaimStatus",
38 |     "ClaimType",
39 |     "Provenance",
40 |     "Scope",
41 | ]
42 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.models.derived.ProfileUpdateInput.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Normalized entry metadata captured in a characterization batch.",
 3 |   "properties": {
 4 |     "id": {
 5 |       "title": "Id",
 6 |       "type": "string"
 7 |     },
 8 |     "manifest_hash": {
 9 |       "anyOf": [
10 |         {
11 |           "type": "string"
12 |         },
13 |         {
14 |           "type": "null"
15 |         }
16 |       ],
17 |       "default": null,
18 |       "title": "Manifest Hash"
19 |     },
20 |     "normalized_path": {
21 |       "title": "Normalized Path",
22 |       "type": "string"
23 |     },
24 |     "source_hash": {
25 |       "anyOf": [
26 |         {
27 |           "type": "string"
28 |         },
29 |         {
30 |           "type": "null"
31 |         }
32 |       ],
33 |       "default": null,
34 |       "title": "Source Hash"
35 |     },
36 |     "tags": {
37 |       "items": {
38 |         "type": "string"
39 |       },
40 |       "title": "Tags",
41 |       "type": "array"
42 |     }
43 |   },
44 |   "required": [
45 |     "id",
46 |     "normalized_path"
47 |   ],
48 |   "title": "ProfileUpdateInput",
49 |   "type": "object"
50 | }
51 | 


--------------------------------------------------------------------------------
/tests/test_api_capture.py:
--------------------------------------------------------------------------------
 1 | """Tests for the public capture API models."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from aijournal.api.capture import CaptureInput, CaptureRequest
 6 | 
 7 | 
 8 | def test_capture_request_has_no_stage_fields() -> None:
 9 |     """The public request schema must not expose internal stage controls."""
10 |     fields = CaptureRequest.model_fields
11 |     assert "min_stage" not in fields
12 |     assert "max_stage" not in fields
13 | 
14 | 
15 | def test_capture_request_to_input_conversion() -> None:
16 |     """CaptureInput should faithfully extend CaptureRequest data."""
17 |     request = CaptureRequest(source="stdin", text="Hello", tags=["focus"])
18 |     capture_input = CaptureInput.from_request(request, min_stage=2, max_stage=4)
19 | 
20 |     for key, value in request.model_dump(mode="python").items():
21 |         assert getattr(capture_input, key) == value
22 |     assert capture_input.min_stage == 2
23 |     assert capture_input.max_stage == 4
24 | 
25 | 
26 | def test_capture_request_retries_defaults_to_none() -> None:
27 |     request = CaptureRequest(source="stdin", text="Body")
28 |     assert request.retries is None
29 | 


--------------------------------------------------------------------------------
/tests/test_cli_simulator.py:
--------------------------------------------------------------------------------
 1 | """CLI coverage for the human simulator command."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import shutil
 6 | from typing import TYPE_CHECKING
 7 | 
 8 | from typer.testing import CliRunner
 9 | 
10 | from aijournal.cli import app
11 | 
12 | if TYPE_CHECKING:
13 |     from pathlib import Path
14 | 
15 |     import pytest
16 | 
17 | 
18 | def test_cli_ops_dev_human_simulator_runs_full_pipeline(
19 |     tmp_path: Path,
20 |     monkeypatch: pytest.MonkeyPatch,
21 | ) -> None:
22 |     runner = CliRunner()
23 |     monkeypatch.setenv("AIJOURNAL_FAKE_OLLAMA", "1")
24 |     output = tmp_path / "human-sim"
25 |     result = runner.invoke(
26 |         app,
27 |         [
28 |             "ops",
29 |             "dev",
30 |             "human-sim",
31 |             "--output",
32 |             str(output),
33 |             "--keep-workspace",
34 |             "--max-stage",
35 |             "8",
36 |             "--pack-level",
37 |             "L1",
38 |         ],
39 |     )
40 | 
41 |     assert result.exit_code == 0, result.stdout
42 |     assert "Result: PASS" in result.stdout
43 |     assert output.exists()
44 | 
45 |     shutil.rmtree(output, ignore_errors=True)
46 | 


--------------------------------------------------------------------------------
/src/aijournal/domain/chat.py:
--------------------------------------------------------------------------------
 1 | """Domain models for chat turns and telemetry."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pydantic import Field
 6 | 
 7 | from aijournal.api.chat import ChatCitation, ChatResponse  # noqa: TC001
 8 | from aijournal.common.base import StrictModel
 9 | from aijournal.common.types import TimestampStr  # noqa: TC001
10 | from aijournal.domain.index import RetrievedChunk  # noqa: TC001
11 | from aijournal.domain.persona import PersonaCore  # noqa: TC001
12 | 
13 | 
14 | class ChatTelemetry(StrictModel):
15 |     """Telemetry captured during a chat turn."""
16 | 
17 |     retrieval_ms: float
18 |     chunk_count: int
19 |     retriever_source: str
20 |     model: str
21 | 
22 | 
23 | class ChatTurn(StrictModel):
24 |     """Structured representation of a chat turn."""
25 | 
26 |     question: str
27 |     answer: str
28 |     response: ChatResponse
29 |     persona: PersonaCore
30 |     citations: list[ChatCitation] = Field(default_factory=list)
31 |     retrieved_chunks: list[RetrievedChunk] = Field(default_factory=list)
32 |     fake_mode: bool
33 |     intent: str
34 |     clarifying_question: str | None = None
35 |     telemetry: ChatTelemetry
36 |     timestamp: TimestampStr
37 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | """Shared pytest fixtures for CLI integration tests."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from datetime import UTC, datetime
 6 | from typing import TYPE_CHECKING
 7 | 
 8 | import pytest
 9 | from typer.testing import CliRunner
10 | 
11 | from aijournal.cli import app
12 | 
13 | if TYPE_CHECKING:
14 |     from pathlib import Path
15 | 
16 | _FIXED_NOW = datetime(2025, 2, 3, 12, 0, tzinfo=UTC)
17 | 
18 | 
19 | @pytest.fixture
20 | def cli_runner() -> CliRunner:
21 |     """Return a Typer CliRunner for invoking the CLI."""
22 |     return CliRunner()
23 | 
24 | 
25 | @pytest.fixture
26 | def cli_workspace(
27 |     tmp_path: Path,
28 |     monkeypatch: pytest.MonkeyPatch,
29 |     cli_runner: CliRunner,
30 | ) -> Path:
31 |     """Initialize a deterministic CLI workspace inside a temporary directory."""
32 |     monkeypatch.chdir(tmp_path)
33 |     monkeypatch.setenv("AIJOURNAL_FAKE_OLLAMA", "1")
34 |     monkeypatch.setattr("aijournal.utils.time.now", lambda: _FIXED_NOW)
35 | 
36 |     result = cli_runner.invoke(app, ["init"])
37 |     if result.exit_code != 0:
38 |         msg = f"Failed to initialize CLI workspace: {result.stdout}"
39 |         raise RuntimeError(msg)
40 | 
41 |     return tmp_path
42 | 


--------------------------------------------------------------------------------
/src/aijournal/utils/time.py:
--------------------------------------------------------------------------------
 1 | """Time and formatting helpers shared across aijournal modules."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import re
 6 | from datetime import UTC, datetime
 7 | from typing import TYPE_CHECKING
 8 | 
 9 | if TYPE_CHECKING:
10 |     from collections.abc import Callable
11 | 
12 | 
13 | def now() -> datetime:
14 |     """Return the current UTC timestamp."""
15 |     return datetime.now(tz=UTC)
16 | 
17 | 
18 | def format_timestamp(dt: datetime) -> str:
19 |     """Format a datetime into ISO-8601 (UTC) without offset suffix."""
20 |     return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
21 | 
22 | 
23 | def slugify_title(title: str) -> str:
24 |     """Produce a filesystem-friendly slug from free-form text."""
25 |     slug = re.sub(r"[^a-z0-9]+", "-", title.lower()).strip("-")
26 |     return slug or "entry"
27 | 
28 | 
29 | def generate_session_id(clock: Callable[[], datetime] = now) -> str:
30 |     """Generate a session identifier using the provided clock."""
31 |     return f"chat-{clock().strftime('%Y%m%d-%H%M%S')}"
32 | 
33 | 
34 | def created_date(created_at: str) -> str:
35 |     """Strip the time component from an ISO-like timestamp string."""
36 |     if "T" in created_at:
37 |         return created_at.split("T", 1)[0]
38 |     return created_at
39 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.packs.PackMeta.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$defs": {
 3 |     "TrimmedFile": {
 4 |       "description": "Record of a file trimmed due to token budget limits.",
 5 |       "properties": {
 6 |         "path": {
 7 |           "title": "Path",
 8 |           "type": "string"
 9 |         },
10 |         "role": {
11 |           "title": "Role",
12 |           "type": "string"
13 |         }
14 |       },
15 |       "required": [
16 |         "role",
17 |         "path"
18 |       ],
19 |       "title": "TrimmedFile",
20 |       "type": "object"
21 |     }
22 |   },
23 |   "description": "Metadata describing the assembled pack.",
24 |   "properties": {
25 |     "generated_at": {
26 |       "title": "Generated At",
27 |       "type": "string"
28 |     },
29 |     "max_tokens": {
30 |       "title": "Max Tokens",
31 |       "type": "integer"
32 |     },
33 |     "total_tokens": {
34 |       "title": "Total Tokens",
35 |       "type": "integer"
36 |     },
37 |     "trimmed": {
38 |       "items": {
39 |         "$ref": "#/$defs/TrimmedFile"
40 |       },
41 |       "title": "Trimmed",
42 |       "type": "array"
43 |     }
44 |   },
45 |   "required": [
46 |     "total_tokens",
47 |     "max_tokens",
48 |     "generated_at"
49 |   ],
50 |   "title": "PackMeta",
51 |   "type": "object"
52 | }
53 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.api.chat.ChatCitation.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Reference to a retrieved chunk included in a chat response.",
 3 |   "properties": {
 4 |     "chunk_id": {
 5 |       "title": "Chunk Id",
 6 |       "type": "string"
 7 |     },
 8 |     "chunk_index": {
 9 |       "title": "Chunk Index",
10 |       "type": "integer"
11 |     },
12 |     "chunk_type": {
13 |       "title": "Chunk Type",
14 |       "type": "string"
15 |     },
16 |     "code": {
17 |       "title": "Code",
18 |       "type": "string"
19 |     },
20 |     "date": {
21 |       "title": "Date",
22 |       "type": "string"
23 |     },
24 |     "normalized_id": {
25 |       "title": "Normalized Id",
26 |       "type": "string"
27 |     },
28 |     "score": {
29 |       "title": "Score",
30 |       "type": "number"
31 |     },
32 |     "source_path": {
33 |       "title": "Source Path",
34 |       "type": "string"
35 |     },
36 |     "tags": {
37 |       "items": {
38 |         "type": "string"
39 |       },
40 |       "title": "Tags",
41 |       "type": "array"
42 |     }
43 |   },
44 |   "required": [
45 |     "chunk_id",
46 |     "code",
47 |     "normalized_id",
48 |     "chunk_index",
49 |     "source_path",
50 |     "date",
51 |     "score",
52 |     "chunk_type"
53 |   ],
54 |   "title": "ChatCitation",
55 |   "type": "object"
56 | }
57 | 


--------------------------------------------------------------------------------
/src/aijournal/domain/journal.py:
--------------------------------------------------------------------------------
 1 | """Journal domain models for normalized entries and sections."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Any
 6 | 
 7 | from pydantic import Field
 8 | 
 9 | from aijournal.common.base import StrictModel
10 | from aijournal.common.types import TimestampStr  # noqa: TC001
11 | 
12 | 
13 | class Section(StrictModel):
14 |     """Normalized representation of a markdown heading or section."""
15 | 
16 |     heading: str
17 |     level: int = 1
18 |     summary: str | None = None
19 |     para_index: int | None = None
20 | 
21 | 
22 | class NormalizedEntity(StrictModel):
23 |     """Structured entity extracted during normalization."""
24 | 
25 |     type: str
26 |     value: str
27 |     extra: dict[str, Any] = Field(default_factory=dict)
28 | 
29 | 
30 | class NormalizedEntry(StrictModel):
31 |     """Machine-readable journal entry used throughout pipelines."""
32 | 
33 |     id: str
34 |     created_at: TimestampStr
35 |     source_path: str
36 |     title: str
37 |     tags: list[str] = Field(default_factory=list)
38 |     sections: list[Section] = Field(default_factory=list)
39 |     entities: list[NormalizedEntity] = Field(default_factory=list)
40 |     summary: str | None = None
41 |     content: str | None = None
42 |     source_hash: str | None = None
43 |     source_type: str | None = None
44 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.models.authoritative.JournalEntry.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Human-authored Markdown entry metadata.",
 3 |   "properties": {
 4 |     "created_at": {
 5 |       "title": "Created At",
 6 |       "type": "string"
 7 |     },
 8 |     "id": {
 9 |       "title": "Id",
10 |       "type": "string"
11 |     },
12 |     "mood": {
13 |       "anyOf": [
14 |         {
15 |           "type": "string"
16 |         },
17 |         {
18 |           "type": "null"
19 |         }
20 |       ],
21 |       "default": null,
22 |       "title": "Mood"
23 |     },
24 |     "projects": {
25 |       "items": {
26 |         "type": "string"
27 |       },
28 |       "title": "Projects",
29 |       "type": "array"
30 |     },
31 |     "summary": {
32 |       "anyOf": [
33 |         {
34 |           "type": "string"
35 |         },
36 |         {
37 |           "type": "null"
38 |         }
39 |       ],
40 |       "default": null,
41 |       "title": "Summary"
42 |     },
43 |     "tags": {
44 |       "items": {
45 |         "type": "string"
46 |       },
47 |       "title": "Tags",
48 |       "type": "array"
49 |     },
50 |     "title": {
51 |       "title": "Title",
52 |       "type": "string"
53 |     }
54 |   },
55 |   "required": [
56 |     "id",
57 |     "created_at",
58 |     "title"
59 |   ],
60 |   "title": "JournalEntry",
61 |   "type": "object"
62 | }
63 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.claims.Provenance.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$defs": {
 3 |     "SourceRef": {
 4 |       "description": "Reference to a normalized entry that supports a claim or fact.",
 5 |       "properties": {
 6 |         "entry_id": {
 7 |           "title": "Entry Id",
 8 |           "type": "string"
 9 |         }
10 |       },
11 |       "required": [
12 |         "entry_id"
13 |       ],
14 |       "title": "SourceRef",
15 |       "type": "object"
16 |     }
17 |   },
18 |   "description": "Provenance metadata recorded for a claim atom.",
19 |   "properties": {
20 |     "first_seen": {
21 |       "anyOf": [
22 |         {
23 |           "type": "string"
24 |         },
25 |         {
26 |           "type": "null"
27 |         }
28 |       ],
29 |       "default": null,
30 |       "title": "First Seen"
31 |     },
32 |     "last_updated": {
33 |       "title": "Last Updated",
34 |       "type": "string"
35 |     },
36 |     "observation_count": {
37 |       "default": 1,
38 |       "minimum": 1,
39 |       "title": "Observation Count",
40 |       "type": "integer"
41 |     },
42 |     "sources": {
43 |       "items": {
44 |         "$ref": "#/$defs/SourceRef"
45 |       },
46 |       "title": "Sources",
47 |       "type": "array"
48 |     }
49 |   },
50 |   "required": [
51 |     "last_updated"
52 |   ],
53 |   "title": "Provenance",
54 |   "type": "object"
55 | }
56 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.prompts.PromptFacetItem.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$defs": {
 3 |     "FacetOperation": {
 4 |       "enum": [
 5 |         "set",
 6 |         "remove",
 7 |         "merge"
 8 |       ],
 9 |       "title": "FacetOperation",
10 |       "type": "string"
11 |     }
12 |   },
13 |   "description": "Lightweight facet change that LLM emits (no system metadata).",
14 |   "properties": {
15 |     "evidence_entry": {
16 |       "anyOf": [
17 |         {
18 |           "type": "string"
19 |         },
20 |         {
21 |           "type": "null"
22 |         }
23 |       ],
24 |       "default": null,
25 |       "title": "Evidence Entry"
26 |     },
27 |     "operation": {
28 |       "$ref": "#/$defs/FacetOperation"
29 |     },
30 |     "path": {
31 |       "title": "Path",
32 |       "type": "string"
33 |     },
34 |     "reason": {
35 |       "anyOf": [
36 |         {
37 |           "type": "string"
38 |         },
39 |         {
40 |           "type": "null"
41 |         }
42 |       ],
43 |       "default": null,
44 |       "title": "Reason"
45 |     },
46 |     "value": {
47 |       "anyOf": [
48 |         {},
49 |         {
50 |           "type": "null"
51 |         }
52 |       ],
53 |       "default": null,
54 |       "title": "Value"
55 |     }
56 |   },
57 |   "required": [
58 |     "path",
59 |     "operation"
60 |   ],
61 |   "title": "PromptFacetItem",
62 |   "type": "object"
63 | }
64 | 


--------------------------------------------------------------------------------
/src/aijournal/pipelines/advise.py:
--------------------------------------------------------------------------------
 1 | """Pipeline helpers for generating advice cards."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING, Any
 6 | 
 7 | from aijournal.fakes import fake_advise
 8 | 
 9 | if TYPE_CHECKING:
10 |     from collections.abc import Callable, Sequence
11 | 
12 |     from aijournal.domain.claims import ClaimAtom
13 |     from aijournal.models.derived import AdviceCard
14 | 
15 | 
16 | def generate_advice(
17 |     question: str,
18 |     profile: dict[str, Any],
19 |     claims: Sequence[ClaimAtom],
20 |     *,
21 |     use_fake_llm: bool,
22 |     advice_identifier: Callable[[str], str],
23 |     llm_advice: AdviceCard | None,
24 |     rankings: Sequence[object],
25 |     pending_prompts: Sequence[str],
26 | ) -> AdviceCard:
27 |     """Produce an `AdviceCard` for the given question."""
28 |     if use_fake_llm:
29 |         return fake_advise(
30 |             question,
31 |             profile,
32 |             claims,
33 |             advice_identifier=advice_identifier,
34 |             rankings=rankings,
35 |             pending_prompts=pending_prompts,
36 |         )
37 | 
38 |     if llm_advice is None:
39 |         msg = "llm_advice must be provided when fake mode is disabled"
40 |         raise ValueError(msg)
41 |     advice = llm_advice.model_copy(deep=True)
42 |     if not advice.id:
43 |         advice.id = advice_identifier(question)
44 |     return advice
45 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.facts.MicrofactConsolidationLog.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$defs": {
 3 |     "MicrofactConsolidationSummary": {
 4 |       "description": "Per-day summary emitted during rebuild operations.",
 5 |       "properties": {
 6 |         "day": {
 7 |           "title": "Day",
 8 |           "type": "string"
 9 |         },
10 |         "merged_records": {
11 |           "title": "Merged Records",
12 |           "type": "integer"
13 |         },
14 |         "new_records": {
15 |           "title": "New Records",
16 |           "type": "integer"
17 |         },
18 |         "processed": {
19 |           "title": "Processed",
20 |           "type": "integer"
21 |         }
22 |       },
23 |       "required": [
24 |         "day",
25 |         "processed",
26 |         "new_records",
27 |         "merged_records"
28 |       ],
29 |       "title": "MicrofactConsolidationSummary",
30 |       "type": "object"
31 |     }
32 |   },
33 |   "description": "Artifact capturing the rebuild run summaries.",
34 |   "properties": {
35 |     "entries": {
36 |       "items": {
37 |         "$ref": "#/$defs/MicrofactConsolidationSummary"
38 |       },
39 |       "title": "Entries",
40 |       "type": "array"
41 |     },
42 |     "generated_at": {
43 |       "title": "Generated At",
44 |       "type": "string"
45 |     }
46 |   },
47 |   "required": [
48 |     "generated_at"
49 |   ],
50 |   "title": "MicrofactConsolidationLog",
51 |   "type": "object"
52 | }
53 | 


--------------------------------------------------------------------------------
/src/aijournal/domain/advice.py:
--------------------------------------------------------------------------------
 1 | """Strict advice card models shared by CLI and services."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pydantic import Field
 6 | 
 7 | from aijournal.common.base import StrictModel
 8 | 
 9 | 
10 | class AdviceReference(StrictModel):
11 |     """References included to ground why advice fits."""
12 | 
13 |     facets: list[str] = Field(default_factory=list)
14 |     claims: list[str] = Field(default_factory=list)
15 | 
16 | 
17 | class AdviceRecommendation(StrictModel):
18 |     """Single recommendation within an advice card."""
19 | 
20 |     title: str
21 |     why_this_fits_you: AdviceReference = Field(default_factory=AdviceReference)
22 |     steps: list[str] = Field(default_factory=list)
23 |     risks: list[str] = Field(default_factory=list)
24 |     mitigations: list[str] = Field(default_factory=list)
25 | 
26 | 
27 | class AdviceCard(StrictModel):
28 |     """Structured advice payload produced by LLM pipelines."""
29 | 
30 |     id: str | None = None
31 |     query: str
32 |     assumptions: list[str] = Field(default_factory=list)
33 |     recommendations: list[AdviceRecommendation] = Field(default_factory=list)
34 |     tradeoffs: list[str] = Field(default_factory=list)
35 |     next_actions: list[str] = Field(default_factory=list)
36 |     confidence: float | None = None
37 |     alignment: AdviceReference = Field(default_factory=AdviceReference)
38 |     style: dict[str, object] = Field(default_factory=dict)
39 | 


--------------------------------------------------------------------------------
/prompts/examples/advise.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "id": null,
 3 |   "query": "How can I lock in my morning focus routine?",
 4 |   "assumptions": [
 5 |     "You can reserve time before noon without conflicts."
 6 |   ],
 7 |   "recommendations": [
 8 |     {
 9 |       "title": "Book consistent focus blocks",
10 |       "why_this_fits_you": {
11 |         "facets": [
12 |           "habits.focus_block.length_minutes"
13 |         ],
14 |         "claims": [
15 |           "goal.focus_hours_per_week"
16 |         ]
17 |       },
18 |       "steps": [
19 |         "Reserve two 45-minute focus sessions before noon.",
20 |         "Protect the blocks in your calendar and announce them to teammates."
21 |       ],
22 |       "risks": [
23 |         "Teammates may schedule over the reserved time."
24 |       ],
25 |       "mitigations": [
26 |         "Share the focus plan during the weekly sync."
27 |       ]
28 |     }
29 |   ],
30 |   "tradeoffs": [
31 |     "Less flexibility for early collaboration."
32 |   ],
33 |   "next_actions": [
34 |     "Send a note to the team about your focus blocks today."
35 |   ],
36 |   "confidence": 0.72,
37 |   "alignment": {
38 |     "facets": [
39 |       "values_motivations.recurring_theme"
40 |     ],
41 |     "claims": [
42 |       "goal.focus_hours_per_week"
43 |     ]
44 |   },
45 |   "style": {
46 |     "tone": "direct",
47 |     "reading_level": "intermediate",
48 |     "include_risks": true,
49 |     "coaching_prompts": false
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.facts.MicroFact.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$defs": {
 3 |     "SourceRef": {
 4 |       "description": "Reference to a normalized entry that supports a claim or fact.",
 5 |       "properties": {
 6 |         "entry_id": {
 7 |           "title": "Entry Id",
 8 |           "type": "string"
 9 |         }
10 |       },
11 |       "required": [
12 |         "entry_id"
13 |       ],
14 |       "title": "SourceRef",
15 |       "type": "object"
16 |     }
17 |   },
18 |   "properties": {
19 |     "confidence": {
20 |       "title": "Confidence",
21 |       "type": "number"
22 |     },
23 |     "evidence": {
24 |       "$ref": "#/$defs/SourceRef"
25 |     },
26 |     "first_seen": {
27 |       "anyOf": [
28 |         {
29 |           "type": "string"
30 |         },
31 |         {
32 |           "type": "null"
33 |         }
34 |       ],
35 |       "default": null,
36 |       "title": "First Seen"
37 |     },
38 |     "id": {
39 |       "title": "Id",
40 |       "type": "string"
41 |     },
42 |     "last_seen": {
43 |       "anyOf": [
44 |         {
45 |           "type": "string"
46 |         },
47 |         {
48 |           "type": "null"
49 |         }
50 |       ],
51 |       "default": null,
52 |       "title": "Last Seen"
53 |     },
54 |     "statement": {
55 |       "title": "Statement",
56 |       "type": "string"
57 |     }
58 |   },
59 |   "required": [
60 |     "id",
61 |     "statement",
62 |     "confidence",
63 |     "evidence"
64 |   ],
65 |   "title": "MicroFact",
66 |   "type": "object"
67 | }
68 | 


--------------------------------------------------------------------------------
/tests/common/test_meta.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import pytest
 4 | from pydantic import ValidationError
 5 | 
 6 | from aijournal.common.base import StrictModel
 7 | from aijournal.common.meta import Artifact, ArtifactKind, ArtifactMeta, LLMResult
 8 | 
 9 | 
10 | class _Payload(StrictModel):
11 |     value: int
12 | 
13 | 
14 | def test_artifact_meta_requires_timestamp() -> None:
15 |     with pytest.raises(ValidationError, match="created_at"):
16 |         ArtifactMeta.model_validate({})
17 | 
18 | 
19 | def test_artifact_defaults_and_strictness() -> None:
20 |     meta = ArtifactMeta(created_at="2025-10-29T00:00:00Z")
21 |     artifact = Artifact[_Payload](
22 |         kind=ArtifactKind.SUMMARY_DAILY,
23 |         meta=meta,
24 |         data=_Payload(value=1),
25 |     )
26 |     assert artifact.kind is ArtifactKind.SUMMARY_DAILY
27 |     assert artifact.model_dump().keys() == {"kind", "meta", "data"}
28 | 
29 |     artifact = Artifact[_Payload](
30 |         kind=ArtifactKind.SUMMARY_DAILY,
31 |         meta=meta,
32 |         data=_Payload(value=1),
33 |         extra_field="nope",  # type: ignore[arg-type]
34 |     )
35 |     assert "extra_field" not in artifact.model_dump()
36 | 
37 | 
38 | def test_llm_result_structure() -> None:
39 |     result = LLMResult[_Payload](
40 |         model="gpt-oss:20b",
41 |         prompt_path="prompts/example.md",
42 |         created_at="2025-10-29T00:00:00Z",
43 |         payload=_Payload(value=9),
44 |     )
45 | 
46 |     assert result.payload.value == 9
47 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.advice.AdviceRecommendation.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$defs": {
 3 |     "AdviceReference": {
 4 |       "description": "References included to ground why advice fits.",
 5 |       "properties": {
 6 |         "claims": {
 7 |           "items": {
 8 |             "type": "string"
 9 |           },
10 |           "title": "Claims",
11 |           "type": "array"
12 |         },
13 |         "facets": {
14 |           "items": {
15 |             "type": "string"
16 |           },
17 |           "title": "Facets",
18 |           "type": "array"
19 |         }
20 |       },
21 |       "title": "AdviceReference",
22 |       "type": "object"
23 |     }
24 |   },
25 |   "description": "Single recommendation within an advice card.",
26 |   "properties": {
27 |     "mitigations": {
28 |       "items": {
29 |         "type": "string"
30 |       },
31 |       "title": "Mitigations",
32 |       "type": "array"
33 |     },
34 |     "risks": {
35 |       "items": {
36 |         "type": "string"
37 |       },
38 |       "title": "Risks",
39 |       "type": "array"
40 |     },
41 |     "steps": {
42 |       "items": {
43 |         "type": "string"
44 |       },
45 |       "title": "Steps",
46 |       "type": "array"
47 |     },
48 |     "title": {
49 |       "title": "Title",
50 |       "type": "string"
51 |     },
52 |     "why_this_fits_you": {
53 |       "$ref": "#/$defs/AdviceReference"
54 |     }
55 |   },
56 |   "required": [
57 |     "title"
58 |   ],
59 |   "title": "AdviceRecommendation",
60 |   "type": "object"
61 | }
62 | 


--------------------------------------------------------------------------------
/src/aijournal/utils/text.py:
--------------------------------------------------------------------------------
 1 | """String utilities shared across capture/ingest flows."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import unicodedata
 6 | 
 7 | INVISIBLE_PREFIX_CHARACTERS = {
 8 |     "\ufeff",  # UTF-8 BOM / zero-width no-break space
 9 |     "\u200b",  # zero-width space
10 |     "\u200c",  # zero-width non-joiner
11 |     "\u200d",  # zero-width joiner
12 |     "\u2060",  # word joiner
13 |     "\u2061",  # function application
14 |     "\u2062",  # invisible times
15 |     "\u2063",  # invisible separator
16 |     "\u2064",  # invisible plus
17 |     "\u202a",  # left-to-right embedding
18 |     "\u202b",  # right-to-left embedding
19 |     "\u202c",  # pop directional formatting
20 |     "\u202d",  # left-to-right override
21 |     "\u202e",  # right-to-left override
22 | }
23 | 
24 | 
25 | def strip_invisible_prefix(text: str) -> str:
26 |     """Remove invisible control characters that precede visible content."""
27 |     index = 0
28 |     length = len(text)
29 |     while index < length:
30 |         char = text[index]
31 |         if char == "\x00":  # stray NULL bytes from some exports
32 |             index += 1
33 |             continue
34 |         if char in INVISIBLE_PREFIX_CHARACTERS:
35 |             index += 1
36 |             continue
37 |         if unicodedata.category(char) == "Cf":
38 |             index += 1
39 |             continue
40 |         break
41 |     if index:
42 |         return text[index:]
43 |     return text
44 | 
45 | 
46 | __all__ = ["strip_invisible_prefix"]
47 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.persona.InterviewSet.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$defs": {
 3 |     "InterviewQuestion": {
 4 |       "description": "Structured interview question proposed by the characterization pipeline.",
 5 |       "properties": {
 6 |         "id": {
 7 |           "title": "Id",
 8 |           "type": "string"
 9 |         },
10 |         "priority": {
11 |           "anyOf": [
12 |             {
13 |               "type": "string"
14 |             },
15 |             {
16 |               "type": "null"
17 |             }
18 |           ],
19 |           "default": null,
20 |           "title": "Priority"
21 |         },
22 |         "target_facet": {
23 |           "anyOf": [
24 |             {
25 |               "type": "string"
26 |             },
27 |             {
28 |               "type": "null"
29 |             }
30 |           ],
31 |           "default": null,
32 |           "title": "Target Facet"
33 |         },
34 |         "text": {
35 |           "title": "Text",
36 |           "type": "string"
37 |         }
38 |       },
39 |       "required": [
40 |         "id",
41 |         "text"
42 |       ],
43 |       "title": "InterviewQuestion",
44 |       "type": "object"
45 |     }
46 |   },
47 |   "description": "Collection of interview questions to review with the operator.",
48 |   "properties": {
49 |     "questions": {
50 |       "items": {
51 |         "$ref": "#/$defs/InterviewQuestion"
52 |       },
53 |       "title": "Questions",
54 |       "type": "array"
55 |     }
56 |   },
57 |   "title": "InterviewSet",
58 |   "type": "object"
59 | }
60 | 


--------------------------------------------------------------------------------
/src/aijournal/domain/claims.py:
--------------------------------------------------------------------------------
 1 | """Strict claim/domain models shared across persona and profile pipelines."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pydantic import Field
 6 | 
 7 | from aijournal.common.base import StrictModel
 8 | from aijournal.domain.enums import ClaimStatus, ClaimType
 9 | from aijournal.domain.evidence import SourceRef
10 | 
11 | # Type alias for claim evidence sources.
12 | ClaimSource = SourceRef
13 | 
14 | 
15 | class Scope(StrictModel):
16 |     """Contextual qualifiers for a claim atom."""
17 | 
18 |     domain: str | None = None
19 |     context: list[str] = Field(default_factory=list)
20 | 
21 | 
22 | class Provenance(StrictModel):
23 |     """Provenance metadata recorded for a claim atom."""
24 | 
25 |     sources: list[ClaimSource] = Field(default_factory=list)
26 |     first_seen: str | None = None
27 |     last_updated: str
28 |     observation_count: int = Field(default=1, ge=1)
29 | 
30 | 
31 | class ClaimAtom(StrictModel):
32 |     """Typed, scoped claim describing part of the persona."""
33 | 
34 |     id: str
35 |     type: ClaimType
36 |     subject: str
37 |     predicate: str
38 |     statement: str
39 |     scope: Scope = Field(default_factory=Scope)
40 |     strength: float = Field(default=0.5, ge=0.0, le=1.0)
41 |     status: ClaimStatus = ClaimStatus.TENTATIVE
42 |     review_after_days: int = 120
43 |     provenance: Provenance
44 | 
45 | 
46 | class ClaimAtomsFile(StrictModel):
47 |     """Container persisted on disk for multiple claim atoms."""
48 | 
49 |     claims: list[ClaimAtom] = Field(default_factory=list)
50 | 


--------------------------------------------------------------------------------
/src/aijournal/domain/index.py:
--------------------------------------------------------------------------------
 1 | """Domain models for retrieval chunks and index metadata."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pydantic import Field
 6 | 
 7 | from aijournal.common.base import StrictModel
 8 | 
 9 | 
10 | class Chunk(StrictModel):
11 |     """Normalized chunk persisted in the retrieval index."""
12 | 
13 |     chunk_id: str
14 |     normalized_id: str
15 |     chunk_index: int
16 |     text: str
17 |     chunk_type: str = "entry"
18 |     date: str
19 |     tags: list[str] = Field(default_factory=list)
20 |     source_type: str | None = None
21 |     source_path: str
22 |     tokens: int
23 |     source_hash: str | None = None
24 |     manifest_hash: str | None = None
25 | 
26 | 
27 | class RetrievedChunk(Chunk):
28 |     """Chunk returned from retrieval with a similarity score."""
29 | 
30 |     score: float
31 | 
32 | 
33 | class IndexMeta(StrictModel):
34 |     """Metadata describing the current retrieval index state."""
35 | 
36 |     embedding_model: str | None = None
37 |     vector_dimension: int | None = None
38 |     chunk_count: int | None = None
39 |     entry_count: int | None = None
40 |     mode: str | None = None
41 |     fake_mode: bool | None = None
42 |     search_k_factor: float | None = None
43 |     char_per_token: float | None = None
44 |     since: str | None = None
45 |     limit: int | None = None
46 |     touched_dates: list[str] = Field(default_factory=list)
47 |     updated_at: str | None = None
48 | 
49 | 
50 | class ChunkBatch(StrictModel):
51 |     """Exported chunk set for a given journal day."""
52 | 
53 |     day: str
54 |     chunks: list[Chunk] = Field(default_factory=list)
55 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.api.chat.ChatResponse.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$defs": {
 3 |     "ChatCitationRef": {
 4 |       "description": "Reference emitted by the LLM; resolved against retrieved chunks.",
 5 |       "properties": {
 6 |         "code": {
 7 |           "minLength": 1,
 8 |           "title": "Code",
 9 |           "type": "string"
10 |         }
11 |       },
12 |       "required": [
13 |         "code"
14 |       ],
15 |       "title": "ChatCitationRef",
16 |       "type": "object"
17 |     }
18 |   },
19 |   "description": "Structured response returned by the chat LLM.",
20 |   "properties": {
21 |     "answer": {
22 |       "maxLength": 4000,
23 |       "title": "Answer",
24 |       "type": "string"
25 |     },
26 |     "citations": {
27 |       "items": {
28 |         "$ref": "#/$defs/ChatCitationRef"
29 |       },
30 |       "title": "Citations",
31 |       "type": "array"
32 |     },
33 |     "clarifying_question": {
34 |       "anyOf": [
35 |         {
36 |           "type": "string"
37 |         },
38 |         {
39 |           "type": "null"
40 |         }
41 |       ],
42 |       "default": null,
43 |       "title": "Clarifying Question"
44 |     },
45 |     "telemetry": {
46 |       "additionalProperties": true,
47 |       "title": "Telemetry",
48 |       "type": "object"
49 |     },
50 |     "timestamp": {
51 |       "anyOf": [
52 |         {
53 |           "type": "string"
54 |         },
55 |         {
56 |           "type": "null"
57 |         }
58 |       ],
59 |       "default": null,
60 |       "title": "Timestamp"
61 |     }
62 |   },
63 |   "required": [
64 |     "answer"
65 |   ],
66 |   "title": "ChatResponse",
67 |   "type": "object"
68 | }
69 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.models.authoritative.SelfProfile.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "properties": {
 3 |     "affect_energy": {
 4 |       "additionalProperties": true,
 5 |       "title": "Affect Energy",
 6 |       "type": "object"
 7 |     },
 8 |     "boundaries_ethics": {
 9 |       "additionalProperties": true,
10 |       "title": "Boundaries Ethics",
11 |       "type": "object"
12 |     },
13 |     "coaching_prefs": {
14 |       "additionalProperties": true,
15 |       "title": "Coaching Prefs",
16 |       "type": "object"
17 |     },
18 |     "dashboard": {
19 |       "additionalProperties": true,
20 |       "title": "Dashboard",
21 |       "type": "object"
22 |     },
23 |     "decision_style": {
24 |       "additionalProperties": true,
25 |       "title": "Decision Style",
26 |       "type": "object"
27 |     },
28 |     "goals": {
29 |       "additionalProperties": true,
30 |       "title": "Goals",
31 |       "type": "object"
32 |     },
33 |     "habits": {
34 |       "additionalProperties": true,
35 |       "title": "Habits",
36 |       "type": "object"
37 |     },
38 |     "planning": {
39 |       "additionalProperties": true,
40 |       "title": "Planning",
41 |       "type": "object"
42 |     },
43 |     "social": {
44 |       "additionalProperties": true,
45 |       "title": "Social",
46 |       "type": "object"
47 |     },
48 |     "traits": {
49 |       "additionalProperties": true,
50 |       "title": "Traits",
51 |       "type": "object"
52 |     },
53 |     "values_motivations": {
54 |       "additionalProperties": true,
55 |       "title": "Values Motivations",
56 |       "type": "object"
57 |     }
58 |   },
59 |   "title": "SelfProfile",
60 |   "type": "object"
61 | }
62 | 


--------------------------------------------------------------------------------
/docs/prompt_improvement_request.md:
--------------------------------------------------------------------------------
 1 | # Prompt Improvement Request
 2 | 
 3 | You are an expert prompt engineer tasked with improving the `aijournal` CLI prompts. You have access to the following materials:
 4 | 
 5 | 1. `docs/prompt_evaluation_report.md` – detailed findings from the latest capture run, including successes, failure modes, and per-prompt examples.
 6 | 2. `ARCHITECTURE.md` – system design, persona/memory layers, pipelines, and schema guarantees.
 7 | 3. `docs/workflow.md` – operator workflow, command order, and pipeline expectations.
 8 | 4. `TLDR.md` – capture pipeline quick reference (stages, inputs, outputs).
 9 | 5. `README.md` – product overview, goals, and runtime prerequisites.
10 | 
11 | ## Your Tasks
12 | 1. For each prompt under `prompts/` (`summarize_day.md`, `extract_facts.md`, `profile_update.md`, `interview.md`, `advise.md`), propose concrete improvements that address the failure modes documented in the report. Include:
13 |    - Specific instruction changes (extra constraints, better examples, reminders of schema contracts).
14 |    - Validation guardrails (evidence span requirements, duplicate suppression, allowed enums/paths, etc.).
15 |    - Any supporting tooling/pipeline adjustments needed for the prompt to operate reliably.
16 | 2. Prioritize fixes that unblock downstream stages (profile updates, persona, advice) and explain inter-prompt dependencies where relevant.
17 | 3. List open questions or follow-up tests required after revising the prompts.
18 | 
19 | Deliver your response as a structured plan with headings per prompt plus cross-cutting recommendations. Cite relevant sections/lines in the supplied docs whenever the rationale depends on architectural or workflow decisions.
20 | 


--------------------------------------------------------------------------------
/src/aijournal/common/constants.py:
--------------------------------------------------------------------------------
 1 | """Shared configuration constants.
 2 | 
 3 | This module contains constants used across multiple modules:
 4 | - Infrastructure/environment configuration (Ollama, embeddings)
 5 | - Shared formats and protocols (timeouts, file suffixes)
 6 | - Cross-cutting paths and settings
 7 | 
 8 | Module-specific constants should remain in their respective modules.
 9 | """
10 | 
11 | # ============================================================================
12 | # LLM & Model Configuration
13 | # ============================================================================
14 | DEFAULT_OLLAMA_HOST = "http://127.0.0.1:11434"
15 | DEFAULT_MODEL_NAME = "gpt-oss:20b"
16 | DEFAULT_LLM_RETRIES = 4
17 | 
18 | # ============================================================================
19 | # Embedding Configuration
20 | # ============================================================================
21 | DEFAULT_EMBEDDING_MODEL = "embeddinggemma:300m"
22 | DEFAULT_EMBED_DIM = 384
23 | EMBED_TIMEOUT = 60.0
24 | 
25 | # ============================================================================
26 | # Timeouts
27 | # ============================================================================
28 | DEFAULT_TIMEOUT_SECONDS = 120.0
29 | 
30 | # ============================================================================
31 | # File Formats
32 | # ============================================================================
33 | MARKDOWN_SUFFIXES = {".md", ".markdown"}
34 | 
35 | # ============================================================================
36 | # Shared Paths
37 | # ============================================================================
38 | PENDING_UPDATES_SUBDIR = "derived/pending/profile_updates"
39 | 


--------------------------------------------------------------------------------
/tests/test_claim_atoms.py:
--------------------------------------------------------------------------------
 1 | """Unit tests for typed claim atom models."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from aijournal.domain.claims import ClaimAtom, ClaimAtomsFile
 6 | 
 7 | 
 8 | def _sample_atom_dict() -> dict:
 9 |     return {
10 |         "id": "pref.deep_work.window",
11 |         "type": "preference",
12 |         "subject": "deep_work",
13 |         "predicate": "best_window",
14 |         "statement": "Best deep work between 09:00–12:00 on weekdays.",
15 |         "scope": {
16 |             "domain": "work",
17 |             "context": ["weekday"],
18 |         },
19 |         "strength": 0.78,
20 |         "status": "accepted",
21 |         "review_after_days": 120,
22 |         "provenance": {
23 |             "sources": [
24 |                 {
25 |                     "entry_id": "2025-10-25_x9t3",
26 |                 },
27 |             ],
28 |             "first_seen": "2024-11-02",
29 |             "last_updated": "2025-10-25T10:10:00Z",
30 |         },
31 |     }
32 | 
33 | 
34 | def test_claim_atom_model_round_trip() -> None:
35 |     atom = ClaimAtom.model_validate(_sample_atom_dict())
36 |     assert atom.type == "preference"
37 |     assert atom.status == "accepted"
38 |     assert atom.scope.domain == "work"
39 |     assert atom.provenance.sources[0].entry_id == "2025-10-25_x9t3"
40 | 
41 |     dumped = atom.model_dump()
42 |     assert dumped["scope"]["context"] == ["weekday"]
43 |     assert dumped["provenance"]["sources"][0]["entry_id"] == "2025-10-25_x9t3"
44 | 
45 | 
46 | def test_claim_atoms_file_container() -> None:
47 |     atoms_file = ClaimAtomsFile.model_validate({"claims": [_sample_atom_dict()]})
48 |     assert len(atoms_file.claims) == 1
49 |     assert atoms_file.claims[0].statement.startswith("Best deep work")
50 | 


--------------------------------------------------------------------------------
/src/aijournal/services/capture/results.py:
--------------------------------------------------------------------------------
 1 | """Lightweight result models shared by orchestration code."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING, Any
 6 | 
 7 | from pydantic import BaseModel, Field
 8 | 
 9 | if TYPE_CHECKING:
10 |     from collections.abc import Iterable
11 | 
12 | 
13 | class OperationResult(BaseModel):
14 |     """Outcome of a single operation/stage."""
15 | 
16 |     ok: bool = True
17 |     changed: bool = False
18 |     message: str = ""
19 |     artifacts: list[str] = Field(default_factory=list)
20 |     warnings: list[str] = Field(default_factory=list)
21 |     details: dict[str, Any] = Field(default_factory=dict)
22 | 
23 |     model_config = {"arbitrary_types_allowed": True}
24 | 
25 |     @classmethod
26 |     def noop(cls, message: str = "nothing to do", **kwargs: Any) -> OperationResult:
27 |         return cls(ok=True, changed=False, message=message, **kwargs)
28 | 
29 |     @classmethod
30 |     def wrote(
31 |         cls,
32 |         artifacts: Iterable[str],
33 |         message: str = "written",
34 |         **kwargs: Any,
35 |     ) -> OperationResult:
36 |         artifacts_list = list(artifacts)
37 |         return cls(
38 |             ok=True,
39 |             changed=bool(artifacts_list),
40 |             message=message,
41 |             artifacts=artifacts_list,
42 |             **kwargs,
43 |         )
44 | 
45 |     @classmethod
46 |     def fail(cls, message: str, **kwargs: Any) -> OperationResult:
47 |         return cls(ok=False, changed=False, message=message, **kwargs)
48 | 
49 | 
50 | class StageResult(BaseModel):
51 |     """Execution metadata for a single capture stage."""
52 | 
53 |     stage: str
54 |     result: OperationResult
55 |     duration_ms: float
56 | 
57 |     model_config = {"arbitrary_types_allowed": True}
58 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.common.meta.LLMResult.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Captured LLM invocation details paired with the structured payload.",
 3 |   "properties": {
 4 |     "attempts": {
 5 |       "default": 1,
 6 |       "title": "Attempts",
 7 |       "type": "integer"
 8 |     },
 9 |     "coercions_applied": {
10 |       "items": {
11 |         "additionalProperties": {
12 |           "type": "string"
13 |         },
14 |         "type": "object"
15 |       },
16 |       "title": "Coercions Applied",
17 |       "type": "array"
18 |     },
19 |     "created_at": {
20 |       "title": "Created At",
21 |       "type": "string"
22 |     },
23 |     "model": {
24 |       "title": "Model",
25 |       "type": "string"
26 |     },
27 |     "payload": {
28 |       "title": "Payload"
29 |     },
30 |     "prompt_hash": {
31 |       "anyOf": [
32 |         {
33 |           "type": "string"
34 |         },
35 |         {
36 |           "type": "null"
37 |         }
38 |       ],
39 |       "default": null,
40 |       "title": "Prompt Hash"
41 |     },
42 |     "prompt_kind": {
43 |       "anyOf": [
44 |         {
45 |           "type": "string"
46 |         },
47 |         {
48 |           "type": "null"
49 |         }
50 |       ],
51 |       "default": null,
52 |       "title": "Prompt Kind"
53 |     },
54 |     "prompt_path": {
55 |       "title": "Prompt Path",
56 |       "type": "string"
57 |     },
58 |     "prompt_set": {
59 |       "anyOf": [
60 |         {
61 |           "type": "string"
62 |         },
63 |         {
64 |           "type": "null"
65 |         }
66 |       ],
67 |       "default": null,
68 |       "title": "Prompt Set"
69 |     }
70 |   },
71 |   "required": [
72 |     "model",
73 |     "prompt_path",
74 |     "created_at",
75 |     "payload"
76 |   ],
77 |   "title": "LLMResult",
78 |   "type": "object"
79 | }
80 | 


--------------------------------------------------------------------------------
/tests/test_cli_ollama_health.py:
--------------------------------------------------------------------------------
 1 | """Tests for `aijournal ollama health`."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING
 6 | 
 7 | import pytest
 8 | 
 9 | from aijournal.cli import app
10 | 
11 | if TYPE_CHECKING:
12 |     from typer.testing import CliRunner
13 | 
14 | 
15 | def _has_ollama_health_command() -> bool:
16 |     return any(cmd.name == "ollama" for cmd in app.registered_commands)
17 | 
18 | 
19 | @pytest.fixture(autouse=True)
20 | def skip_if_ollama_missing() -> None:
21 |     if not _has_ollama_health_command():
22 |         pytest.skip("ollama health command not available yet")
23 | 
24 | 
25 | @pytest.fixture(autouse=True)
26 | def fake_ollama(monkeypatch: pytest.MonkeyPatch) -> None:
27 |     monkeypatch.setenv("AIJOURNAL_FAKE_OLLAMA", "1")
28 |     monkeypatch.delenv("HTTP_PROXY", raising=False)
29 |     monkeypatch.delenv("HTTPS_PROXY", raising=False)
30 | 
31 | 
32 | def test_ollama_health_reports_models_and_default(cli_runner: CliRunner) -> None:
33 |     result = cli_runner.invoke(app, ["ops", "system", "ollama", "health"])
34 |     assert result.exit_code == 0, result.output
35 |     normalized = result.output.lower()
36 |     assert "models" in normalized
37 |     assert "default" in normalized
38 | 
39 | 
40 | def test_ollama_health_is_idempotent(cli_runner: CliRunner) -> None:
41 |     first = cli_runner.invoke(app, ["ops", "system", "ollama", "health"])
42 |     assert first.exit_code == 0, first.output
43 | 
44 |     second = cli_runner.invoke(app, ["ops", "system", "ollama", "health"])
45 |     assert second.exit_code == 0, second.output
46 |     normalized_first = first.output.lower()
47 |     normalized_second = second.output.lower()
48 |     for token in ("models", "default"):
49 |         assert token in normalized_first
50 |         assert token in normalized_second
51 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.facts.ConsolidatedMicroFact.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Global microfact entry that survives consolidation runs.",
 3 |   "properties": {
 4 |     "canonical_statement": {
 5 |       "title": "Canonical Statement",
 6 |       "type": "string"
 7 |     },
 8 |     "confidence": {
 9 |       "title": "Confidence",
10 |       "type": "number"
11 |     },
12 |     "contexts": {
13 |       "items": {
14 |         "type": "string"
15 |       },
16 |       "title": "Contexts",
17 |       "type": "array"
18 |     },
19 |     "domain": {
20 |       "anyOf": [
21 |         {
22 |           "type": "string"
23 |         },
24 |         {
25 |           "type": "null"
26 |         }
27 |       ],
28 |       "default": null,
29 |       "title": "Domain"
30 |     },
31 |     "evidence_entries": {
32 |       "items": {
33 |         "type": "string"
34 |       },
35 |       "title": "Evidence Entries",
36 |       "type": "array"
37 |     },
38 |     "first_seen": {
39 |       "title": "First Seen",
40 |       "type": "string"
41 |     },
42 |     "id": {
43 |       "title": "Id",
44 |       "type": "string"
45 |     },
46 |     "last_seen": {
47 |       "title": "Last Seen",
48 |       "type": "string"
49 |     },
50 |     "observation_count": {
51 |       "title": "Observation Count",
52 |       "type": "integer"
53 |     },
54 |     "source_fact_ids": {
55 |       "items": {
56 |         "type": "string"
57 |       },
58 |       "title": "Source Fact Ids",
59 |       "type": "array"
60 |     },
61 |     "statement": {
62 |       "title": "Statement",
63 |       "type": "string"
64 |     }
65 |   },
66 |   "required": [
67 |     "id",
68 |     "statement",
69 |     "canonical_statement",
70 |     "confidence",
71 |     "first_seen",
72 |     "last_seen",
73 |     "observation_count"
74 |   ],
75 |   "title": "ConsolidatedMicroFact",
76 |   "type": "object"
77 | }
78 | 


--------------------------------------------------------------------------------
/src/aijournal/api/capture.py:
--------------------------------------------------------------------------------
 1 | """Public capture API models."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Literal
 6 | 
 7 | from pydantic import Field
 8 | 
 9 | from aijournal.common.base import StrictModel
10 | 
11 | 
12 | class CaptureRequest(StrictModel):
13 |     """User-facing capture options supplied by CLI or HTTP."""
14 | 
15 |     source: Literal["stdin", "editor", "file", "dir"]
16 |     text: str | None = None
17 |     paths: list[str] = Field(default_factory=list)
18 |     source_type: Literal["journal", "notes", "blog"] = "journal"
19 |     date: str | None = None
20 |     title: str | None = None
21 |     slug: str | None = None
22 |     tags: list[str] = Field(default_factory=list)
23 |     projects: list[str] = Field(default_factory=list)
24 |     mood: str | None = None
25 |     apply_profile: Literal["auto", "review"] = "auto"
26 |     rebuild: Literal["auto", "always", "skip"] = "auto"
27 |     pack: Literal["L1", "L3", "L4"] | None = None
28 |     retries: int | None = Field(
29 |         default=None,
30 |         ge=0,
31 |         description=(
32 |             "Optional override for LLM retries; defaults to workspace configuration when unset."
33 |         ),
34 |     )
35 |     progress: bool = True
36 |     dry_run: bool = False
37 |     snapshot: bool = True
38 | 
39 | 
40 | class CaptureInput(CaptureRequest):
41 |     """Internal capture payload enriched with stage bounds."""
42 | 
43 |     min_stage: int = Field(0, ge=0)
44 |     max_stage: int = Field(7, ge=0)
45 | 
46 |     @classmethod
47 |     def from_request(
48 |         cls,
49 |         request: CaptureRequest,
50 |         *,
51 |         min_stage: int,
52 |         max_stage: int,
53 |     ) -> CaptureInput:
54 |         payload = request.model_dump(mode="python")
55 |         payload.update({"min_stage": min_stage, "max_stage": max_stage})
56 |         return cls.model_validate(payload)
57 | 


--------------------------------------------------------------------------------
/src/aijournal/services/capture/stages/stage1_normalize.py:
--------------------------------------------------------------------------------
 1 | """Stage 1: normalize captured Markdown into structured entries."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from time import perf_counter
 6 | from typing import TYPE_CHECKING
 7 | 
 8 | if TYPE_CHECKING:
 9 |     from pathlib import Path
10 | 
11 |     from aijournal.common.app_config import AppConfig
12 |     from aijournal.services.capture import NormalizeStageOutputs
13 | 
14 |     from .stage0_persist import EntryResult
15 | 
16 | 
17 | def run_normalize_stage_1(
18 |     entry_results: list[EntryResult],
19 |     root: Path,
20 |     config: AppConfig,
21 | ) -> NormalizeStageOutputs:
22 |     from aijournal.services.capture import NormalizeStageOutputs, normalize_entries
23 |     from aijournal.services.capture.results import OperationResult
24 | 
25 |     normalize_start = perf_counter()
26 |     artifact_counts = normalize_entries(entry_results, root, config) if entry_results else {}
27 |     duration_ms = (perf_counter() - normalize_start) * 1000.0
28 |     normalized_count = int(artifact_counts.get("normalized", 0))
29 |     normalized_paths = artifact_counts.get("paths", [])
30 |     normalize_details: dict[str, object] = {"normalized": normalized_count}
31 |     if normalized_count:
32 |         message = f"{normalized_count} normalized entries updated"
33 |         op_result = OperationResult.wrote(
34 |             normalized_paths,
35 |             message=message,
36 |             details=normalize_details,
37 |         )
38 |     else:
39 |         op_result = OperationResult.noop(
40 |             "normalized entries already up to date",
41 |             details=normalize_details,
42 |         )
43 |     changed_dates = sorted(
44 |         {entry.date for entry in entry_results if entry.changed and not entry.deduped},
45 |     )
46 |     return NormalizeStageOutputs(artifact_counts, op_result, duration_ms, changed_dates)
47 | 


--------------------------------------------------------------------------------
/tests/test_cli_microfacts.py:
--------------------------------------------------------------------------------
 1 | """Tests for microfacts ops commands."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING
 6 | 
 7 | import yaml
 8 | 
 9 | from aijournal.cli import app
10 | from tests.test_cli_facts import DATE, _write_normalized, _write_summary
11 | 
12 | if TYPE_CHECKING:
13 |     from pathlib import Path
14 | 
15 |     from typer.testing import CliRunner
16 | 
17 | 
18 | def _load_yaml(path: Path) -> dict[str, object]:
19 |     return yaml.safe_load(path.read_text(encoding="utf-8"))
20 | 
21 | 
22 | def test_microfacts_rebuild_command_writes_artifacts(
23 |     cli_workspace: Path,
24 |     cli_runner: CliRunner,
25 | ) -> None:
26 |     _write_normalized(cli_workspace)
27 |     _write_summary(cli_workspace)
28 | 
29 |     # Generate daily microfacts first.
30 |     first = cli_runner.invoke(
31 |         app,
32 |         ["ops", "pipeline", "extract-facts", "--date", DATE],
33 |     )
34 |     assert first.exit_code == 0, first.stdout
35 | 
36 |     result = cli_runner.invoke(app, ["ops", "microfacts", "rebuild"])
37 | 
38 |     assert result.exit_code == 0, result.stdout
39 |     derived = cli_workspace / "derived" / "microfacts"
40 |     consolidated = derived / "consolidated.yaml"
41 |     assert consolidated.exists()
42 |     consolidated_artifact = _load_yaml(consolidated)
43 |     assert consolidated_artifact.get("kind") == "microfacts.consolidated"
44 |     data = consolidated_artifact.get("data", {})
45 |     assert data.get("facts") or [], "Expected consolidated facts"
46 | 
47 |     logs_dir = derived / "logs"
48 |     log_files = sorted(logs_dir.glob("rebuild-*.yaml"))
49 |     assert log_files, "Expected a consolidation log file"
50 |     log_payload = _load_yaml(log_files[-1])
51 |     assert log_payload.get("kind") == "microfacts.log"
52 |     log_entries = log_payload.get("data", {}).get("entries") or []
53 |     assert log_entries, "Expected log entries in consolidation log"
54 | 


--------------------------------------------------------------------------------
/tests/services/capture/test_graceful_profile_update.py:
--------------------------------------------------------------------------------
 1 | """Tests for the graceful profile update wrapper."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING, Never
 6 | 
 7 | import typer
 8 | 
 9 | from aijournal.common.app_config import AppConfig
10 | from aijournal.services.capture.graceful import graceful_profile_update
11 | 
12 | if TYPE_CHECKING:
13 |     from pathlib import Path
14 | 
15 | 
16 | def test_graceful_profile_update_success(tmp_path: Path, monkeypatch) -> None:
17 |     batch_path = tmp_path / "derived" / "pending" / "profile_updates" / "test.yaml"
18 |     batch_path.parent.mkdir(parents=True, exist_ok=True)
19 | 
20 |     def fake_run(
21 |         date: str,
22 |         *,
23 |         progress: bool,
24 |         generate_preview: bool,
25 |         workspace: Path | None = None,
26 |         config: AppConfig | None = None,
27 |     ) -> Path:
28 |         del date, progress, generate_preview, workspace, config
29 |         batch_path.write_text("batch", encoding="utf-8")
30 |         return batch_path
31 | 
32 |     monkeypatch.setattr("aijournal.commands.profile_update.run_profile_update", fake_run)
33 | 
34 |     path, error = graceful_profile_update(
35 |         "2025-10-27",
36 |         progress=False,
37 |         generate_preview=False,
38 |         workspace=tmp_path,
39 |         config=AppConfig(),
40 |     )
41 | 
42 |     assert error is None
43 |     assert path == batch_path
44 | 
45 | 
46 | def test_graceful_profile_update_failure(tmp_path: Path, monkeypatch) -> None:
47 |     def failing_run(*_args, **_kwargs) -> Never:
48 |         raise typer.Exit(1)
49 | 
50 |     monkeypatch.setattr("aijournal.commands.profile_update.run_profile_update", failing_run)
51 | 
52 |     path, error = graceful_profile_update(
53 |         "2025-10-27",
54 |         progress=False,
55 |         generate_preview=False,
56 |         workspace=tmp_path,
57 |         config=AppConfig(),
58 |     )
59 | 
60 |     assert path is None
61 |     assert error is not None
62 | 


--------------------------------------------------------------------------------
/tests/pipelines/test_advise.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from aijournal.domain.claims import ClaimAtom, Provenance, Scope
 4 | from aijournal.models.derived import AdviceCard
 5 | from aijournal.pipelines import advise
 6 | 
 7 | 
 8 | def _claim(claim_id: str) -> ClaimAtom:
 9 |     return ClaimAtom(
10 |         id=claim_id,
11 |         type="preference",
12 |         subject="self",
13 |         predicate="insight",
14 |         statement="Statement",
15 |         scope=Scope(),
16 |         strength=0.6,
17 |         status="tentative",
18 |         review_after_days=120,
19 |         provenance=Provenance(
20 |             sources=[],
21 |             first_seen="2024-01-01",
22 |             last_updated="2024-01-02T00:00:00Z",
23 |             observation_count=1,
24 |         ),
25 |     )
26 | 
27 | 
28 | def test_generate_advice_fake_mode() -> None:
29 |     card = advise.generate_advice(
30 |         "How should I focus?",
31 |         profile={"values": {"top": ["Focus"]}},
32 |         claims=[_claim("claim-1")],
33 |         use_fake_llm=True,
34 |         advice_identifier=lambda q: "adv-test",
35 |         llm_advice=None,
36 |         rankings=[],
37 |         pending_prompts=["Follow up"],
38 |     )
39 | 
40 |     assert isinstance(card, AdviceCard)
41 |     assert card.id.startswith("adv-test") or card.id  # ensure fake path returns AdviceCard
42 | 
43 | 
44 | def test_generate_advice_llm_path() -> None:
45 |     response = AdviceCard(
46 |         id="adv-1234",
47 |         query="How should I focus?",
48 |         assumptions=["Assumption"],
49 |         recommendations=[],
50 |         tradeoffs=[],
51 |         next_actions=[],
52 |         confidence=0.5,
53 |     )
54 | 
55 |     card = advise.generate_advice(
56 |         "How should I focus?",
57 |         profile={},
58 |         claims=[],
59 |         use_fake_llm=False,
60 |         advice_identifier=lambda q: "adv-test",
61 |         llm_advice=response,
62 |         rankings=[],
63 |         pending_prompts=[],
64 |     )
65 | 
66 |     assert card.id == "adv-1234"
67 | 


--------------------------------------------------------------------------------
/src/aijournal/models/derived.py:
--------------------------------------------------------------------------------
 1 | """Derived data models for aijournal."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pydantic import Field
 6 | 
 7 | from aijournal.domain.advice import AdviceCard as _AdviceCard
 8 | from aijournal.domain.advice import AdviceRecommendation as _AdviceRecommendation
 9 | from aijournal.domain.advice import AdviceReference as _AdviceReference
10 | from aijournal.domain.changes import ProfileUpdateProposals
11 | from aijournal.domain.claims import ClaimAtom
12 | from aijournal.domain.events import ClaimPreviewEvent  # noqa: TC001
13 | from aijournal.domain.persona import InterviewQuestion, InterviewSet, PersonaCore
14 | 
15 | from .base import AijournalModel
16 | 
17 | PersonaCore.model_rebuild(_types_namespace={"ClaimAtom": ClaimAtom})
18 | InterviewSet.model_rebuild(
19 |     _types_namespace={
20 |         "InterviewQuestion": InterviewQuestion,
21 |     },
22 | )
23 | 
24 | 
25 | AdviceReference = _AdviceReference
26 | AdviceRecommendation = _AdviceRecommendation
27 | AdviceCard = _AdviceCard
28 | 
29 | 
30 | class ProfileUpdatePreview(AijournalModel):
31 |     """Preview metadata bundled with a profile update batch."""
32 | 
33 |     claim_events: list[ClaimPreviewEvent] = Field(default_factory=list)
34 |     interview_prompts: list[str] = Field(default_factory=list)
35 | 
36 | 
37 | class ProfileUpdateInput(AijournalModel):
38 |     """Normalized entry metadata captured in a characterization batch."""
39 | 
40 |     id: str
41 |     normalized_path: str
42 |     source_hash: str | None = None
43 |     manifest_hash: str | None = None
44 |     tags: list[str] = Field(default_factory=list)
45 | 
46 | 
47 | class ProfileUpdateBatch(AijournalModel):
48 |     """Pending profile update batch emitted by the unified profile update stage/CLI."""
49 | 
50 |     batch_id: str
51 |     created_at: str
52 |     date: str
53 |     inputs: list[ProfileUpdateInput] = Field(default_factory=list)
54 |     proposals: ProfileUpdateProposals = Field(default_factory=ProfileUpdateProposals)
55 |     preview: ProfileUpdatePreview | None = None
56 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.index.Chunk.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Normalized chunk persisted in the retrieval index.",
 3 |   "properties": {
 4 |     "chunk_id": {
 5 |       "title": "Chunk Id",
 6 |       "type": "string"
 7 |     },
 8 |     "chunk_index": {
 9 |       "title": "Chunk Index",
10 |       "type": "integer"
11 |     },
12 |     "chunk_type": {
13 |       "default": "entry",
14 |       "title": "Chunk Type",
15 |       "type": "string"
16 |     },
17 |     "date": {
18 |       "title": "Date",
19 |       "type": "string"
20 |     },
21 |     "manifest_hash": {
22 |       "anyOf": [
23 |         {
24 |           "type": "string"
25 |         },
26 |         {
27 |           "type": "null"
28 |         }
29 |       ],
30 |       "default": null,
31 |       "title": "Manifest Hash"
32 |     },
33 |     "normalized_id": {
34 |       "title": "Normalized Id",
35 |       "type": "string"
36 |     },
37 |     "source_hash": {
38 |       "anyOf": [
39 |         {
40 |           "type": "string"
41 |         },
42 |         {
43 |           "type": "null"
44 |         }
45 |       ],
46 |       "default": null,
47 |       "title": "Source Hash"
48 |     },
49 |     "source_path": {
50 |       "title": "Source Path",
51 |       "type": "string"
52 |     },
53 |     "source_type": {
54 |       "anyOf": [
55 |         {
56 |           "type": "string"
57 |         },
58 |         {
59 |           "type": "null"
60 |         }
61 |       ],
62 |       "default": null,
63 |       "title": "Source Type"
64 |     },
65 |     "tags": {
66 |       "items": {
67 |         "type": "string"
68 |       },
69 |       "title": "Tags",
70 |       "type": "array"
71 |     },
72 |     "text": {
73 |       "title": "Text",
74 |       "type": "string"
75 |     },
76 |     "tokens": {
77 |       "title": "Tokens",
78 |       "type": "integer"
79 |     }
80 |   },
81 |   "required": [
82 |     "chunk_id",
83 |     "normalized_id",
84 |     "chunk_index",
85 |     "text",
86 |     "date",
87 |     "source_path",
88 |     "tokens"
89 |   ],
90 |   "title": "Chunk",
91 |   "type": "object"
92 | }
93 | 


--------------------------------------------------------------------------------
/tests/pipelines/test_summarize.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from aijournal.domain.facts import DailySummary
 4 | from aijournal.domain.journal import NormalizedEntry
 5 | from aijournal.models.authoritative import JournalSection
 6 | from aijournal.pipelines import summarize
 7 | 
 8 | 
 9 | def _normalized_entry(entry_id: str, title: str) -> NormalizedEntry:
10 |     return NormalizedEntry(
11 |         id=entry_id,
12 |         created_at="2024-01-02T09:00:00Z",
13 |         source_path=f"data/journal/{entry_id}.md",
14 |         title=title,
15 |         tags=["focus"],
16 |         sections=[JournalSection(heading="Highlights", level=2)],
17 |     )
18 | 
19 | 
20 | def test_generate_summary_uses_fake_path_when_requested() -> None:
21 |     entries = [_normalized_entry("entry-1", "Deep Work")]
22 | 
23 |     def request_factory() -> DailySummary:  # pragma: no cover - should not run
24 |         msg = "request_factory should not be invoked for fake flows"
25 |         raise AssertionError(msg)
26 | 
27 |     summary_result = summarize.generate_summary(
28 |         entries,
29 |         "2024-01-02",
30 |         use_fake_llm=True,
31 |         llm_summary=None,
32 |     )
33 | 
34 |     assert summary_result.day == "2024-01-02"
35 |     assert summary_result.bullets[0].startswith("Deep Work")
36 |     assert summary_result.todo_candidates
37 | 
38 | 
39 | def test_generate_summary_merges_llm_results_with_fallback() -> None:
40 |     entries = [_normalized_entry("entry-1", "Deep Work")]
41 |     response = DailySummary(
42 |         day="",
43 |         bullets=["Refined insight", ""],
44 |         highlights=[],
45 |         todo_candidates=["", "Review notes"],
46 |     )
47 | 
48 |     summary_result = summarize.generate_summary(
49 |         entries,
50 |         "2024-01-02",
51 |         use_fake_llm=False,
52 |         llm_summary=response,
53 |     )
54 |     assert summary_result.day == "2024-01-02"
55 |     assert summary_result.bullets == ["Refined insight"]
56 |     assert summary_result.highlights == ["Refined insight"]
57 |     assert summary_result.todo_candidates == ["Review notes"]
58 | 


--------------------------------------------------------------------------------
/docs/archive/2025-10-29_CLI_MIGRATION.md:
--------------------------------------------------------------------------------
 1 | # CLI Migration Guide
 2 | 
 3 | The refactor consolidates everyday commands at the top level and moves specialist tools under
 4 | `aijournal ops ...`. Use this table to map legacy verbs to their new homes.
 5 | 
 6 | | Legacy Command | Replacement |
 7 | | -------------- | ----------- |
 8 | | `aijournal ingest` | `aijournal capture --from <path> ...` (everyday) or `aijournal ops pipeline ingest` (advanced) |
 9 | | `aijournal new` | `aijournal capture --text/--edit ...` |
10 | | `aijournal facts` | `aijournal ops pipeline extract-facts` |
11 | | `aijournal summarize` | `aijournal ops pipeline summarize` |
12 | | `aijournal review-updates` | `aijournal ops pipeline review` |
13 | | `aijournal characterize` | `aijournal ops pipeline characterize` |
14 | | `aijournal profile suggest` | (unchanged) `aijournal ops profile suggest` |
15 | | `aijournal profile apply` | (unchanged) `aijournal ops profile apply` — usually run automatically by `capture` |
16 | | `aijournal profile status` | `aijournal status` (summary) or `aijournal ops profile status` (detailed) |
17 | | `aijournal tail` | `aijournal ops index update` |
18 | | `aijournal pack` | `aijournal export pack` |
19 | | `aijournal chatd` | `aijournal serve chat` |
20 | 
21 | ## Everyday Flow
22 | 
23 | ```sh
24 | uv run aijournal init --path ~/journal
25 | cd ~/journal
26 | uv run aijournal capture --text "What I learned today" --tag reflection
27 | uv run aijournal status
28 | uv run aijournal chat "What progress did I make?"
29 | uv run aijournal export pack --level L1 --format yaml
30 | ```
31 | 
32 | ## Advanced Pipelines
33 | 
34 | Manual reruns remain available under `aijournal ops pipeline ...`. For example:
35 | 
36 | ```sh
37 | # Re-run extraction on a specific day
38 | uv run aijournal ops pipeline extract-facts --date 2025-02-05 --retries 2 --progress
39 | 
40 | # Ingest a directory in CI without refreshing downstream artifacts
41 | uv run aijournal ops pipeline ingest docs/notes --source-type notes --no-snapshot
42 | ```
43 | 
44 | All `ops` commands accept the same options they did previously; the refactor only reorganizes where
45 | you invoke them.
46 | 


--------------------------------------------------------------------------------
/tests/pipelines/test_persona.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from datetime import UTC, datetime
 4 | 
 5 | import pytest
 6 | 
 7 | from aijournal.domain.claims import ClaimAtom, Provenance, Scope
 8 | from aijournal.pipelines import persona as persona_pipeline
 9 | 
10 | 
11 | def _test_claim(claim_id: str, *, status: str = "accepted") -> ClaimAtom:
12 |     return ClaimAtom(
13 |         id=claim_id,
14 |         type="preference",
15 |         subject="Self",
16 |         predicate="insight",
17 |         statement=f"{claim_id} statement",
18 |         scope=Scope(),
19 |         strength=0.8,
20 |         status=status,
21 |         review_after_days=120,
22 |         provenance=Provenance(
23 |             sources=[],
24 |             first_seen="2024-01-01",
25 |             last_updated="2024-01-02T00:00:00Z",
26 |             observation_count=1,
27 |         ),
28 |     )
29 | 
30 | 
31 | def test_build_persona_core_requires_content() -> None:
32 |     with pytest.raises(ValueError, match="Nothing to include in persona core"):
33 |         persona_pipeline.build_persona_core(
34 |             {},
35 |             [],
36 |             token_budget=100,
37 |             max_claims=5,
38 |             min_claims=1,
39 |             char_per_token=4.0,
40 |             impact_weights={},
41 |             now=datetime(2024, 1, 2, tzinfo=UTC),
42 |         )
43 | 
44 | 
45 | def test_build_persona_core_trims_to_budget() -> None:
46 |     profile = {"traits": {"strengths": ["Focused work"]}}
47 |     claims = [_test_claim("claim-1", status="accepted"), _test_claim("claim-2", status="tentative")]
48 | 
49 |     result = persona_pipeline.build_persona_core(
50 |         profile,
51 |         claims,
52 |         token_budget=1,
53 |         max_claims=2,
54 |         min_claims=1,
55 |         char_per_token=1.0,
56 |         impact_weights={},
57 |         now=datetime(2024, 1, 2, tzinfo=UTC),
58 |     )
59 | 
60 |     assert len(result.ranked_claims) == 2
61 |     assert result.selection.trimmed_ids, "Expected trimming when budget is tight"
62 |     assert len(result.persona.claims) == 1
63 |     assert result.persona.profile == profile
64 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.events.FeedbackBatch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$defs": {
 3 |     "FeedbackAdjustmentEvent": {
 4 |       "description": "Record of a claim strength adjustment triggered by chat feedback.",
 5 |       "properties": {
 6 |         "claim_id": {
 7 |           "title": "Claim Id",
 8 |           "type": "string"
 9 |         },
10 |         "delta": {
11 |           "title": "Delta",
12 |           "type": "number"
13 |         },
14 |         "kind": {
15 |           "default": "feedback",
16 |           "title": "Kind",
17 |           "type": "string"
18 |         },
19 |         "new_strength": {
20 |           "title": "New Strength",
21 |           "type": "number"
22 |         },
23 |         "old_strength": {
24 |           "title": "Old Strength",
25 |           "type": "number"
26 |         }
27 |       },
28 |       "required": [
29 |         "claim_id",
30 |         "old_strength",
31 |         "new_strength",
32 |         "delta"
33 |       ],
34 |       "title": "FeedbackAdjustmentEvent",
35 |       "type": "object"
36 |     },
37 |     "FeedbackDirection": {
38 |       "enum": [
39 |         "up",
40 |         "down"
41 |       ],
42 |       "title": "FeedbackDirection",
43 |       "type": "string"
44 |     }
45 |   },
46 |   "description": "Batch of feedback adjustments queued for claim strength updates.",
47 |   "properties": {
48 |     "batch_id": {
49 |       "title": "Batch Id",
50 |       "type": "string"
51 |     },
52 |     "created_at": {
53 |       "title": "Created At",
54 |       "type": "string"
55 |     },
56 |     "events": {
57 |       "items": {
58 |         "$ref": "#/$defs/FeedbackAdjustmentEvent"
59 |       },
60 |       "title": "Events",
61 |       "type": "array"
62 |     },
63 |     "feedback": {
64 |       "$ref": "#/$defs/FeedbackDirection"
65 |     },
66 |     "question": {
67 |       "title": "Question",
68 |       "type": "string"
69 |     },
70 |     "session_id": {
71 |       "title": "Session Id",
72 |       "type": "string"
73 |     }
74 |   },
75 |   "required": [
76 |     "batch_id",
77 |     "created_at",
78 |     "session_id",
79 |     "question",
80 |     "feedback"
81 |   ],
82 |   "title": "FeedbackBatch",
83 |   "type": "object"
84 | }
85 | 


--------------------------------------------------------------------------------
/tests/services/capture/test_stage_summarize.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING, Never
 4 | 
 5 | import typer
 6 | 
 7 | from aijournal.common.app_config import AppConfig
 8 | from aijournal.services.capture import CaptureInput
 9 | from aijournal.services.capture.stages import stage2_summarize
10 | 
11 | if TYPE_CHECKING:
12 |     from pathlib import Path
13 | 
14 | 
15 | def _make_inputs() -> CaptureInput:
16 |     return CaptureInput(source="stdin", text="Sample entry")
17 | 
18 | 
19 | def test_stage2_summarize_success(tmp_path: Path, monkeypatch) -> None:
20 |     summary_path = tmp_path / "derived" / "summaries" / "2025-10-27.yaml"
21 |     summary_path.parent.mkdir(parents=True, exist_ok=True)
22 | 
23 |     called: list[str] = []
24 | 
25 |     def fake_run(
26 |         date: str,
27 |         *,
28 |         progress: bool,
29 |         workspace: Path | None = None,
30 |         config: AppConfig | None = None,
31 |     ) -> Path:
32 |         called.append(date)
33 |         summary_path.write_text("summary", encoding="utf-8")
34 |         return summary_path
35 | 
36 |     monkeypatch.setattr("aijournal.commands.summarize.run_summarize", fake_run)
37 | 
38 |     outputs = stage2_summarize.run_summarize_stage_2(
39 |         ["2025-10-27"],
40 |         _make_inputs(),
41 |         tmp_path,
42 |         AppConfig(),
43 |     )
44 | 
45 |     assert called == ["2025-10-27"]
46 |     assert outputs.result.ok is True
47 |     assert outputs.result.changed is True
48 |     assert outputs.paths == ["derived/summaries/2025-10-27.yaml"]
49 | 
50 | 
51 | def test_stage2_summarize_handles_failure(tmp_path: Path, monkeypatch) -> None:
52 |     def failing_run(*args, **kwargs) -> Never:
53 |         raise typer.Exit(1)
54 | 
55 |     monkeypatch.setattr("aijournal.commands.summarize.run_summarize", failing_run)
56 | 
57 |     outputs = stage2_summarize.run_summarize_stage_2(
58 |         ["2025-10-27"],
59 |         _make_inputs(),
60 |         tmp_path,
61 |         AppConfig(),
62 |     )
63 | 
64 |     assert outputs.result.ok is False
65 |     assert outputs.result.changed is False
66 |     assert outputs.result.warnings
67 |     assert outputs.paths == []
68 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.index.RetrievedChunk.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Chunk returned from retrieval with a similarity score.",
 3 |   "properties": {
 4 |     "chunk_id": {
 5 |       "title": "Chunk Id",
 6 |       "type": "string"
 7 |     },
 8 |     "chunk_index": {
 9 |       "title": "Chunk Index",
10 |       "type": "integer"
11 |     },
12 |     "chunk_type": {
13 |       "default": "entry",
14 |       "title": "Chunk Type",
15 |       "type": "string"
16 |     },
17 |     "date": {
18 |       "title": "Date",
19 |       "type": "string"
20 |     },
21 |     "manifest_hash": {
22 |       "anyOf": [
23 |         {
24 |           "type": "string"
25 |         },
26 |         {
27 |           "type": "null"
28 |         }
29 |       ],
30 |       "default": null,
31 |       "title": "Manifest Hash"
32 |     },
33 |     "normalized_id": {
34 |       "title": "Normalized Id",
35 |       "type": "string"
36 |     },
37 |     "score": {
38 |       "title": "Score",
39 |       "type": "number"
40 |     },
41 |     "source_hash": {
42 |       "anyOf": [
43 |         {
44 |           "type": "string"
45 |         },
46 |         {
47 |           "type": "null"
48 |         }
49 |       ],
50 |       "default": null,
51 |       "title": "Source Hash"
52 |     },
53 |     "source_path": {
54 |       "title": "Source Path",
55 |       "type": "string"
56 |     },
57 |     "source_type": {
58 |       "anyOf": [
59 |         {
60 |           "type": "string"
61 |         },
62 |         {
63 |           "type": "null"
64 |         }
65 |       ],
66 |       "default": null,
67 |       "title": "Source Type"
68 |     },
69 |     "tags": {
70 |       "items": {
71 |         "type": "string"
72 |       },
73 |       "title": "Tags",
74 |       "type": "array"
75 |     },
76 |     "text": {
77 |       "title": "Text",
78 |       "type": "string"
79 |     },
80 |     "tokens": {
81 |       "title": "Tokens",
82 |       "type": "integer"
83 |     }
84 |   },
85 |   "required": [
86 |     "chunk_id",
87 |     "normalized_id",
88 |     "chunk_index",
89 |     "text",
90 |     "date",
91 |     "source_path",
92 |     "tokens",
93 |     "score"
94 |   ],
95 |   "title": "RetrievedChunk",
96 |   "type": "object"
97 | }
98 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.chat_sessions.ChatLearningEntry.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$defs": {
 3 |     "ChatTelemetry": {
 4 |       "description": "Telemetry captured during a chat turn.",
 5 |       "properties": {
 6 |         "chunk_count": {
 7 |           "title": "Chunk Count",
 8 |           "type": "integer"
 9 |         },
10 |         "model": {
11 |           "title": "Model",
12 |           "type": "string"
13 |         },
14 |         "retrieval_ms": {
15 |           "title": "Retrieval Ms",
16 |           "type": "number"
17 |         },
18 |         "retriever_source": {
19 |           "title": "Retriever Source",
20 |           "type": "string"
21 |         }
22 |       },
23 |       "required": [
24 |         "retrieval_ms",
25 |         "chunk_count",
26 |         "retriever_source",
27 |         "model"
28 |       ],
29 |       "title": "ChatTelemetry",
30 |       "type": "object"
31 |     }
32 |   },
33 |   "description": "Entry capturing a single learning from a chat turn.",
34 |   "properties": {
35 |     "citations": {
36 |       "items": {
37 |         "type": "string"
38 |       },
39 |       "title": "Citations",
40 |       "type": "array"
41 |     },
42 |     "clarifying_question": {
43 |       "anyOf": [
44 |         {
45 |           "type": "string"
46 |         },
47 |         {
48 |           "type": "null"
49 |         }
50 |       ],
51 |       "default": null,
52 |       "title": "Clarifying Question"
53 |     },
54 |     "feedback": {
55 |       "anyOf": [
56 |         {
57 |           "type": "string"
58 |         },
59 |         {
60 |           "type": "null"
61 |         }
62 |       ],
63 |       "default": null,
64 |       "title": "Feedback"
65 |     },
66 |     "intent": {
67 |       "title": "Intent",
68 |       "type": "string"
69 |     },
70 |     "question": {
71 |       "title": "Question",
72 |       "type": "string"
73 |     },
74 |     "telemetry": {
75 |       "$ref": "#/$defs/ChatTelemetry"
76 |     },
77 |     "turn_index": {
78 |       "title": "Turn Index",
79 |       "type": "integer"
80 |     }
81 |   },
82 |   "required": [
83 |     "turn_index",
84 |     "question",
85 |     "intent",
86 |     "telemetry"
87 |   ],
88 |   "title": "ChatLearningEntry",
89 |   "type": "object"
90 | }
91 | 


--------------------------------------------------------------------------------
/src/aijournal/services/microfacts/snapshot.py:
--------------------------------------------------------------------------------
 1 | """Helpers for loading and filtering consolidated microfacts snapshots."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pathlib import Path
 6 | from typing import TYPE_CHECKING, Any
 7 | 
 8 | from pydantic import ValidationError
 9 | 
10 | from aijournal.domain.facts import ConsolidatedMicroFact, ConsolidatedMicrofactsFile
11 | from aijournal.io.artifacts import load_artifact_data
12 | 
13 | if TYPE_CHECKING:
14 |     from aijournal.common.app_config import AppConfig
15 | 
16 | 
17 | def _consolidated_path(workspace: Path, config: AppConfig) -> Path:
18 |     derived = Path(config.paths.derived)
19 |     if not derived.is_absolute():
20 |         derived = workspace / derived
21 |     return derived / "microfacts" / "consolidated.yaml"
22 | 
23 | 
24 | def load_consolidated_microfacts(
25 |     workspace: Path,
26 |     config: AppConfig,
27 | ) -> ConsolidatedMicrofactsFile | None:
28 |     """Return the consolidated snapshot if it exists and validates."""
29 |     path = _consolidated_path(workspace, config)
30 |     if not path.exists():
31 |         return None
32 |     try:
33 |         return load_artifact_data(path, ConsolidatedMicrofactsFile)
34 |     except ValidationError:
35 |         return None
36 | 
37 | 
38 | def select_recurring_facts(
39 |     snapshot: ConsolidatedMicrofactsFile,
40 |     *,
41 |     min_observations: int = 2,
42 |     limit: int = 20,
43 | ) -> list[dict[str, Any]]:
44 |     """Return the strongest recurring facts for prompt context."""
45 |     candidates: list[ConsolidatedMicroFact] = [
46 |         fact for fact in snapshot.facts if fact.observation_count >= min_observations
47 |     ]
48 |     sorted_facts = sorted(
49 |         candidates,
50 |         key=lambda fact: (-fact.observation_count, fact.last_seen, fact.id),
51 |     )[:limit]
52 |     return [
53 |         {
54 |             "statement": fact.statement,
55 |             "observation_count": fact.observation_count,
56 |             "first_seen": fact.first_seen,
57 |             "last_seen": fact.last_seen,
58 |             "contexts": fact.contexts,
59 |             "evidence_entries": fact.evidence_entries,
60 |         }
61 |         for fact in sorted_facts
62 |     ]
63 | 


--------------------------------------------------------------------------------
/tests/scripts/test_check_structured_metrics.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import json
 4 | import subprocess
 5 | import sys
 6 | from pathlib import Path
 7 | 
 8 | SCRIPT = Path(__file__).resolve().parents[2] / "scripts" / "check_structured_metrics.py"
 9 | 
10 | 
11 | def _write_metrics(path: Path, entries: list[dict[str, object]]) -> None:
12 |     path.parent.mkdir(parents=True, exist_ok=True)
13 |     with path.open("w", encoding="utf-8") as handle:
14 |         for entry in entries:
15 |             handle.write(json.dumps(entry) + "\n")
16 | 
17 | 
18 | def test_check_structured_metrics_passes(tmp_path: Path) -> None:
19 |     metrics_path = tmp_path / "metrics.jsonl"
20 |     _write_metrics(
21 |         metrics_path,
22 |         [
23 |             {"repair_attempts": 0, "coercion_count": 1},
24 |             {"repair_attempts": 1, "coercion_count": 2},
25 |         ],
26 |     )
27 | 
28 |     result = subprocess.run(
29 |         [
30 |             sys.executable,
31 |             str(SCRIPT),
32 |             "--path",
33 |             str(metrics_path),
34 |             "--max-repair-rate",
35 |             "0.6",
36 |             "--max-avg-coercions",
37 |             "3.0",
38 |         ],
39 |         cwd=tmp_path,
40 |         check=False,
41 |         capture_output=True,
42 |         text=True,
43 |     )
44 | 
45 |     assert result.returncode == 0, result.stderr
46 | 
47 | 
48 | def test_check_structured_metrics_fails_when_exceeding_threshold(tmp_path: Path) -> None:
49 |     metrics_path = tmp_path / "metrics.jsonl"
50 |     _write_metrics(
51 |         metrics_path,
52 |         [
53 |             {"repair_attempts": 5, "coercion_count": 20},
54 |         ],
55 |     )
56 | 
57 |     result = subprocess.run(
58 |         [
59 |             sys.executable,
60 |             str(SCRIPT),
61 |             "--path",
62 |             str(metrics_path),
63 |             "--max-repair-rate",
64 |             "0.1",
65 |             "--max-avg-coercions",
66 |             "3.0",
67 |         ],
68 |         cwd=tmp_path,
69 |         check=False,
70 |         capture_output=True,
71 |         text=True,
72 |     )
73 | 
74 |     assert result.returncode == 1
75 |     assert "Repair rate" in result.stdout
76 | 


--------------------------------------------------------------------------------
/src/aijournal/common/meta.py:
--------------------------------------------------------------------------------
 1 | """Artifact envelope primitives shared across aijournal."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from enum import StrEnum
 6 | from typing import Generic, TypeVar
 7 | 
 8 | from pydantic import Field
 9 | 
10 | from .base import StrictModel
11 | from .types import TimestampStr  # noqa: TC001
12 | 
13 | T = TypeVar("T")
14 | 
15 | 
16 | class ArtifactMeta(StrictModel):
17 |     """Metadata describing how an artifact was produced."""
18 | 
19 |     created_at: TimestampStr
20 |     model: str | None = None
21 |     prompt_path: str | None = None
22 |     prompt_hash: str | None = None
23 |     prompt_kind: str | None = None
24 |     prompt_set: str | None = None
25 |     char_per_token: float | None = None
26 |     notes: dict[str, str] | None = None
27 | 
28 | 
29 | class ArtifactKind(StrEnum):
30 |     """Enumeration of persisted artifact categories."""
31 | 
32 |     PERSONA_CORE = "persona.core"
33 |     SUMMARY_DAILY = "summaries.daily"
34 |     MICROFACTS_DAILY = "microfacts.daily"
35 |     MICROFACTS_CONSOLIDATED = "microfacts.consolidated"
36 |     MICROFACTS_LOG = "microfacts.log"
37 |     PROFILE_PROPOSALS = "profile.proposals"
38 |     PROFILE_UPDATES = "profile.updates"
39 |     FEEDBACK_BATCH = "feedback.batch"
40 |     INDEX_META = "index.meta"
41 |     INDEX_CHUNKS = "index.chunks"
42 |     PACK_L1 = "pack.L1"
43 |     PACK_L2 = "pack.L2"
44 |     PACK_L3 = "pack.L3"
45 |     PACK_L4 = "pack.L4"
46 |     CHAT_TRANSCRIPT = "chat.transcript"
47 |     CHAT_SUMMARY = "chat.summary"
48 |     CHAT_LEARNINGS = "chat.learnings"
49 |     ADVICE_CARD = "advice.card"
50 | 
51 | 
52 | class Artifact(StrictModel, Generic[T]):
53 |     """Artifact envelope wrapping a payload of type ``T``."""
54 | 
55 |     kind: ArtifactKind
56 |     meta: ArtifactMeta
57 |     data: T
58 | 
59 | 
60 | class LLMResult(StrictModel, Generic[T]):
61 |     """Captured LLM invocation details paired with the structured payload."""
62 | 
63 |     model: str
64 |     prompt_path: str
65 |     prompt_hash: str | None = None
66 |     prompt_kind: str | None = None
67 |     prompt_set: str | None = None
68 |     created_at: TimestampStr
69 |     payload: T
70 |     attempts: int = 1
71 |     coercions_applied: list[dict[str, str]] = Field(default_factory=list)
72 | 


--------------------------------------------------------------------------------
/src/aijournal/pipelines/summarize.py:
--------------------------------------------------------------------------------
 1 | """Pipeline orchestration for daily summary generation."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING
 6 | 
 7 | from aijournal.domain.facts import DailySummary
 8 | from aijournal.fakes import fake_summarize
 9 | 
10 | if TYPE_CHECKING:
11 |     from collections.abc import Sequence
12 | 
13 |     from aijournal.domain.journal import NormalizedEntry
14 | 
15 | 
16 | def _todo_from_entries(entries: Sequence[NormalizedEntry]) -> list[str]:
17 |     todos: list[str] = []
18 |     for entry in entries[:3]:
19 |         title = entry.title or entry.id or "entry"
20 |         todos.append(f"Review follow-ups from {title}")
21 |     return todos or ["Capture explicit next actions in tomorrow's entry."]
22 | 
23 | 
24 | def generate_summary(
25 |     entries: Sequence[NormalizedEntry],
26 |     date: str,
27 |     *,
28 |     use_fake_llm: bool,
29 |     llm_summary: DailySummary | None,
30 | ) -> DailySummary:
31 |     """Produce a `DailySummary` for the given date."""
32 | 
33 |     def fallback_model() -> DailySummary:
34 |         return fake_summarize(entries, date, todo_builder=_todo_from_entries)
35 | 
36 |     if use_fake_llm:
37 |         return fallback_model()
38 | 
39 |     if llm_summary is None:
40 |         msg = "llm_summary must be provided when fake mode is disabled"
41 |         raise ValueError(msg)
42 | 
43 |     bullets = [item for item in llm_summary.bullets if item]
44 |     highlights = [item for item in llm_summary.highlights if item]
45 |     todo_candidates = [item for item in llm_summary.todo_candidates if item]
46 | 
47 |     if not bullets:
48 |         fallback = fallback_model()
49 |         bullets = fallback.bullets
50 |         if not highlights:
51 |             highlights = fallback.highlights
52 |         if not todo_candidates:
53 |             todo_candidates = fallback.todo_candidates
54 | 
55 |     if not highlights:
56 |         highlights = bullets[:3]
57 |     if not todo_candidates:
58 |         todo_candidates = _todo_from_entries(entries)
59 | 
60 |     day = llm_summary.day or date
61 | 
62 |     return DailySummary(
63 |         day=day,
64 |         bullets=bullets,
65 |         highlights=highlights,
66 |         todo_candidates=todo_candidates,
67 |     )
68 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.common.meta.ArtifactMeta.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "description": "Metadata describing how an artifact was produced.",
  3 |   "properties": {
  4 |     "char_per_token": {
  5 |       "anyOf": [
  6 |         {
  7 |           "type": "number"
  8 |         },
  9 |         {
 10 |           "type": "null"
 11 |         }
 12 |       ],
 13 |       "default": null,
 14 |       "title": "Char Per Token"
 15 |     },
 16 |     "created_at": {
 17 |       "title": "Created At",
 18 |       "type": "string"
 19 |     },
 20 |     "model": {
 21 |       "anyOf": [
 22 |         {
 23 |           "type": "string"
 24 |         },
 25 |         {
 26 |           "type": "null"
 27 |         }
 28 |       ],
 29 |       "default": null,
 30 |       "title": "Model"
 31 |     },
 32 |     "notes": {
 33 |       "anyOf": [
 34 |         {
 35 |           "additionalProperties": {
 36 |             "type": "string"
 37 |           },
 38 |           "type": "object"
 39 |         },
 40 |         {
 41 |           "type": "null"
 42 |         }
 43 |       ],
 44 |       "default": null,
 45 |       "title": "Notes"
 46 |     },
 47 |     "prompt_hash": {
 48 |       "anyOf": [
 49 |         {
 50 |           "type": "string"
 51 |         },
 52 |         {
 53 |           "type": "null"
 54 |         }
 55 |       ],
 56 |       "default": null,
 57 |       "title": "Prompt Hash"
 58 |     },
 59 |     "prompt_kind": {
 60 |       "anyOf": [
 61 |         {
 62 |           "type": "string"
 63 |         },
 64 |         {
 65 |           "type": "null"
 66 |         }
 67 |       ],
 68 |       "default": null,
 69 |       "title": "Prompt Kind"
 70 |     },
 71 |     "prompt_path": {
 72 |       "anyOf": [
 73 |         {
 74 |           "type": "string"
 75 |         },
 76 |         {
 77 |           "type": "null"
 78 |         }
 79 |       ],
 80 |       "default": null,
 81 |       "title": "Prompt Path"
 82 |     },
 83 |     "prompt_set": {
 84 |       "anyOf": [
 85 |         {
 86 |           "type": "string"
 87 |         },
 88 |         {
 89 |           "type": "null"
 90 |         }
 91 |       ],
 92 |       "default": null,
 93 |       "title": "Prompt Set"
 94 |     }
 95 |   },
 96 |   "required": [
 97 |     "created_at"
 98 |   ],
 99 |   "title": "ArtifactMeta",
100 |   "type": "object"
101 | }
102 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.changes.ClaimAtomInput.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$defs": {
 3 |     "ClaimStatus": {
 4 |       "enum": [
 5 |         "accepted",
 6 |         "tentative",
 7 |         "rejected"
 8 |       ],
 9 |       "title": "ClaimStatus",
10 |       "type": "string"
11 |     },
12 |     "ClaimType": {
13 |       "enum": [
14 |         "preference",
15 |         "value",
16 |         "goal",
17 |         "boundary",
18 |         "trait",
19 |         "habit",
20 |         "aversion",
21 |         "skill"
22 |       ],
23 |       "title": "ClaimType",
24 |       "type": "string"
25 |     },
26 |     "Scope": {
27 |       "description": "Contextual qualifiers for a claim atom.",
28 |       "properties": {
29 |         "context": {
30 |           "items": {
31 |             "type": "string"
32 |           },
33 |           "title": "Context",
34 |           "type": "array"
35 |         },
36 |         "domain": {
37 |           "anyOf": [
38 |             {
39 |               "type": "string"
40 |             },
41 |             {
42 |               "type": "null"
43 |             }
44 |           ],
45 |           "default": null,
46 |           "title": "Domain"
47 |         }
48 |       },
49 |       "title": "Scope",
50 |       "type": "object"
51 |     }
52 |   },
53 |   "description": "Normalized claim payload without identifiers or provenance.",
54 |   "properties": {
55 |     "predicate": {
56 |       "title": "Predicate",
57 |       "type": "string"
58 |     },
59 |     "review_after_days": {
60 |       "title": "Review After Days",
61 |       "type": "integer"
62 |     },
63 |     "scope": {
64 |       "$ref": "#/$defs/Scope"
65 |     },
66 |     "statement": {
67 |       "title": "Statement",
68 |       "type": "string"
69 |     },
70 |     "status": {
71 |       "$ref": "#/$defs/ClaimStatus"
72 |     },
73 |     "strength": {
74 |       "title": "Strength",
75 |       "type": "number"
76 |     },
77 |     "subject": {
78 |       "title": "Subject",
79 |       "type": "string"
80 |     },
81 |     "type": {
82 |       "$ref": "#/$defs/ClaimType"
83 |     }
84 |   },
85 |   "required": [
86 |     "type",
87 |     "subject",
88 |     "predicate",
89 |     "statement",
90 |     "scope",
91 |     "strength",
92 |     "status",
93 |     "review_after_days"
94 |   ],
95 |   "title": "ClaimAtomInput",
96 |   "type": "object"
97 | }
98 | 


--------------------------------------------------------------------------------
/tests/io_tests/test_artifacts.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | import yaml
 6 | 
 7 | from aijournal.common.base import StrictModel
 8 | from aijournal.common.meta import Artifact, ArtifactKind, ArtifactMeta
 9 | from aijournal.io.artifacts import load_artifact, load_artifact_data, save_artifact
10 | 
11 | if TYPE_CHECKING:
12 |     from pathlib import Path
13 | 
14 | 
15 | class _Payload(StrictModel):
16 |     value: int
17 | 
18 | 
19 | def _make_artifact(value: int = 1) -> Artifact[_Payload]:
20 |     return Artifact[_Payload](
21 |         kind=ArtifactKind.SUMMARY_DAILY,
22 |         meta=ArtifactMeta(created_at="2025-10-29T00:00:00Z"),
23 |         data=_Payload(value=value),
24 |     )
25 | 
26 | 
27 | def test_save_artifact_writes_deterministic_yaml(tmp_path: Path) -> None:
28 |     artifact = _make_artifact()
29 |     path = tmp_path / "artifact.yaml"
30 | 
31 |     save_artifact(path, artifact)
32 | 
33 |     text = path.read_text(encoding="utf-8")
34 |     assert text.endswith("\n")
35 |     assert text.splitlines()[0] == "data:"
36 | 
37 |     loaded_yaml = yaml.safe_load(text)
38 |     assert "schema" not in loaded_yaml
39 |     assert loaded_yaml["kind"] == ArtifactKind.SUMMARY_DAILY.value
40 | 
41 | 
42 | def test_save_artifact_json(tmp_path: Path) -> None:
43 |     artifact = _make_artifact(2)
44 |     path = tmp_path / "artifact.json"
45 | 
46 |     save_artifact(path, artifact)
47 | 
48 |     text = path.read_text(encoding="utf-8")
49 |     assert text.endswith("\n")
50 |     assert text.strip().startswith("{")
51 | 
52 |     loaded = load_artifact(path, _Payload)
53 |     assert loaded.data.value == 2
54 | 
55 | 
56 | def test_load_artifact_roundtrip(tmp_path: Path) -> None:
57 |     artifact = _make_artifact(3)
58 |     path = tmp_path / "artifact.yaml"
59 |     save_artifact(path, artifact)
60 | 
61 |     loaded = load_artifact(path, _Payload)
62 |     assert isinstance(loaded.data, _Payload)
63 |     assert loaded.data.value == 3
64 | 
65 | 
66 | def test_load_artifact_data_returns_payload(tmp_path: Path) -> None:
67 |     artifact = _make_artifact(5)
68 |     path = tmp_path / "example.yaml"
69 |     save_artifact(path, artifact)
70 | 
71 |     payload = load_artifact_data(path, _Payload)
72 |     assert isinstance(payload, _Payload)
73 |     assert payload.value == 5
74 | 


--------------------------------------------------------------------------------
/src/aijournal/domain/chat_sessions.py:
--------------------------------------------------------------------------------
 1 | """Structured models for persisted chat session artifacts."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pydantic import Field
 6 | 
 7 | from aijournal.common.base import StrictModel
 8 | from aijournal.common.types import TimestampStr  # noqa: TC001
 9 | from aijournal.domain.chat import ChatTelemetry  # noqa: TC001
10 | 
11 | 
12 | class ChatTranscriptTurn(StrictModel):
13 |     """Captured question/answer pair within a chat transcript."""
14 | 
15 |     turn_index: int
16 |     timestamp: TimestampStr
17 |     question: str
18 |     answer: str
19 |     intent: str
20 |     citations: list[str] = Field(default_factory=list)
21 |     clarifying_question: str | None = None
22 |     telemetry: ChatTelemetry
23 |     feedback: str | None = None
24 |     fake_mode: bool
25 | 
26 | 
27 | class ChatTranscript(StrictModel):
28 |     """Artifact describing a full chat session transcript."""
29 | 
30 |     session_id: str
31 |     created_at: TimestampStr
32 |     updated_at: TimestampStr
33 |     turns: list[ChatTranscriptTurn] = Field(default_factory=list)
34 | 
35 | 
36 | class ChatSessionSummary(StrictModel):
37 |     """Aggregated summary metadata for a chat session."""
38 | 
39 |     session_id: str
40 |     created_at: TimestampStr
41 |     updated_at: TimestampStr
42 |     turn_count: int = 0
43 |     intent_counts: dict[str, int] = Field(default_factory=dict)
44 |     last_question: str | None = None
45 |     last_answer_preview: str | None = None
46 |     last_citations: list[str] = Field(default_factory=list)
47 |     last_clarifying_question: str | None = None
48 |     last_retrieval_ms: float | None = None
49 |     last_feedback: str | None = None
50 | 
51 | 
52 | class ChatLearningEntry(StrictModel):
53 |     """Entry capturing a single learning from a chat turn."""
54 | 
55 |     turn_index: int
56 |     question: str
57 |     intent: str
58 |     citations: list[str] = Field(default_factory=list)
59 |     clarifying_question: str | None = None
60 |     telemetry: ChatTelemetry
61 |     feedback: str | None = None
62 | 
63 | 
64 | class ChatSessionLearnings(StrictModel):
65 |     """Rollup of learnings captured across a chat session."""
66 | 
67 |     session_id: str
68 |     created_at: TimestampStr
69 |     updated_at: TimestampStr
70 |     learnings: list[ChatLearningEntry] = Field(default_factory=list)
71 | 


--------------------------------------------------------------------------------
/src/aijournal/domain/facts.py:
--------------------------------------------------------------------------------
 1 | """Domain models for extracted facts and daily summaries."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pydantic import Field
 6 | 
 7 | from aijournal.common.base import StrictModel
 8 | from aijournal.domain.changes import ClaimProposal  # noqa: TC001
 9 | from aijournal.domain.evidence import SourceRef
10 | 
11 | 
12 | class DailySummary(StrictModel):
13 |     """Derived day summary (PLAN §4.1)."""
14 | 
15 |     day: str
16 |     bullets: list[str] = Field(default_factory=list)
17 |     highlights: list[str] = Field(default_factory=list)
18 |     todo_candidates: list[str] = Field(default_factory=list)
19 | 
20 | 
21 | FactEvidence = SourceRef
22 | 
23 | 
24 | class MicroFact(StrictModel):
25 |     id: str
26 |     statement: str
27 |     confidence: float
28 |     evidence: FactEvidence
29 |     first_seen: str | None = None
30 |     last_seen: str | None = None
31 | 
32 | 
33 | class MicroFactsFile(StrictModel):
34 |     facts: list[MicroFact] = Field(default_factory=list)
35 |     claim_proposals: list[ClaimProposal] = Field(default_factory=list)
36 | 
37 | 
38 | class ConsolidatedMicroFact(StrictModel):
39 |     """Global microfact entry that survives consolidation runs."""
40 | 
41 |     id: str
42 |     statement: str
43 |     canonical_statement: str
44 |     confidence: float
45 |     first_seen: str
46 |     last_seen: str
47 |     observation_count: int
48 |     domain: str | None = None
49 |     contexts: list[str] = Field(default_factory=list)
50 |     evidence_entries: list[str] = Field(default_factory=list)
51 |     source_fact_ids: list[str] = Field(default_factory=list)
52 | 
53 | 
54 | class ConsolidatedMicrofactsFile(StrictModel):
55 |     """Artifact capturing the global consolidated microfact snapshot."""
56 | 
57 |     generated_at: str
58 |     embedding_model: str | None = None
59 |     facts: list[ConsolidatedMicroFact] = Field(default_factory=list)
60 | 
61 | 
62 | class MicrofactConsolidationSummary(StrictModel):
63 |     """Per-day summary emitted during rebuild operations."""
64 | 
65 |     day: str
66 |     processed: int
67 |     new_records: int
68 |     merged_records: int
69 | 
70 | 
71 | class MicrofactConsolidationLog(StrictModel):
72 |     """Artifact capturing the rebuild run summaries."""
73 | 
74 |     generated_at: str
75 |     entries: list[MicrofactConsolidationSummary] = Field(default_factory=list)
76 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.models.authoritative.ManifestEntry.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "additionalProperties": true,
  3 |   "description": "Manifest row describing an ingested Markdown source.",
  4 |   "properties": {
  5 |     "aliases": {
  6 |       "items": {
  7 |         "type": "string"
  8 |       },
  9 |       "title": "Aliases",
 10 |       "type": "array"
 11 |     },
 12 |     "canonical_journal_path": {
 13 |       "anyOf": [
 14 |         {
 15 |           "type": "string"
 16 |         },
 17 |         {
 18 |           "type": "null"
 19 |         }
 20 |       ],
 21 |       "default": null,
 22 |       "title": "Canonical Journal Path"
 23 |     },
 24 |     "created_at": {
 25 |       "title": "Created At",
 26 |       "type": "string"
 27 |     },
 28 |     "hash": {
 29 |       "title": "Hash",
 30 |       "type": "string"
 31 |     },
 32 |     "id": {
 33 |       "title": "Id",
 34 |       "type": "string"
 35 |     },
 36 |     "ingested_at": {
 37 |       "title": "Ingested At",
 38 |       "type": "string"
 39 |     },
 40 |     "model": {
 41 |       "anyOf": [
 42 |         {
 43 |           "type": "string"
 44 |         },
 45 |         {
 46 |           "type": "null"
 47 |         }
 48 |       ],
 49 |       "default": null,
 50 |       "title": "Model"
 51 |     },
 52 |     "normalized": {
 53 |       "title": "Normalized",
 54 |       "type": "string"
 55 |     },
 56 |     "path": {
 57 |       "title": "Path",
 58 |       "type": "string"
 59 |     },
 60 |     "snapshot_path": {
 61 |       "anyOf": [
 62 |         {
 63 |           "type": "string"
 64 |         },
 65 |         {
 66 |           "type": "null"
 67 |         }
 68 |       ],
 69 |       "default": null,
 70 |       "title": "Snapshot Path"
 71 |     },
 72 |     "source_type": {
 73 |       "anyOf": [
 74 |         {
 75 |           "type": "string"
 76 |         },
 77 |         {
 78 |           "type": "null"
 79 |         }
 80 |       ],
 81 |       "default": null,
 82 |       "title": "Source Type"
 83 |     },
 84 |     "tags": {
 85 |       "items": {
 86 |         "type": "string"
 87 |       },
 88 |       "title": "Tags",
 89 |       "type": "array"
 90 |     }
 91 |   },
 92 |   "required": [
 93 |     "hash",
 94 |     "path",
 95 |     "normalized",
 96 |     "ingested_at",
 97 |     "created_at",
 98 |     "id"
 99 |   ],
100 |   "title": "ManifestEntry",
101 |   "type": "object"
102 | }
103 | 


--------------------------------------------------------------------------------
/src/aijournal/models/authoritative.py:
--------------------------------------------------------------------------------
 1 | """Authoritative data models for aijournal."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Any
 6 | 
 7 | from pydantic import ConfigDict, Field
 8 | 
 9 | from aijournal.domain.claims import ClaimAtom  # noqa: TC001
10 | from aijournal.domain.journal import Section
11 | 
12 | from .base import AijournalModel
13 | 
14 | JsonScalar = str | int | float | bool | None
15 | JsonValue = JsonScalar | list[Any] | dict[str, Any]
16 | 
17 | 
18 | class ManifestEntry(AijournalModel):
19 |     """Manifest row describing an ingested Markdown source."""
20 | 
21 |     model_config = ConfigDict(
22 |         extra="allow",
23 |         populate_by_name=True,
24 |         arbitrary_types_allowed=True,
25 |     )
26 | 
27 |     hash: str
28 |     path: str
29 |     normalized: str
30 |     source_type: str | None = None
31 |     ingested_at: str
32 |     created_at: str
33 |     id: str
34 |     tags: list[str] = Field(default_factory=list)
35 |     model: str | None = None
36 |     canonical_journal_path: str | None = None
37 |     snapshot_path: str | None = None
38 |     aliases: list[str] = Field(default_factory=list)
39 | 
40 | 
41 | class JournalEntry(AijournalModel):
42 |     """Human-authored Markdown entry metadata."""
43 | 
44 |     id: str
45 |     created_at: str
46 |     title: str
47 |     tags: list[str] = Field(default_factory=list)
48 |     mood: str | None = None
49 |     projects: list[str] = Field(default_factory=list)
50 |     summary: str | None = None
51 | 
52 | 
53 | JournalSection = Section
54 | 
55 | 
56 | class ClaimsFile(AijournalModel):
57 |     claims: list[ClaimAtom] = Field(default_factory=list)
58 | 
59 | 
60 | class SelfProfile(AijournalModel):
61 |     traits: dict[str, Any] = Field(default_factory=dict)
62 |     values_motivations: dict[str, Any] = Field(default_factory=dict)
63 |     goals: dict[str, Any] = Field(default_factory=dict)
64 |     decision_style: dict[str, Any] = Field(default_factory=dict)
65 |     affect_energy: dict[str, Any] = Field(default_factory=dict)
66 |     planning: dict[str, Any] = Field(default_factory=dict)
67 |     dashboard: dict[str, Any] = Field(default_factory=dict)
68 |     habits: dict[str, Any] = Field(default_factory=dict)
69 |     social: dict[str, Any] = Field(default_factory=dict)
70 |     boundaries_ethics: dict[str, Any] = Field(default_factory=dict)
71 |     coaching_prefs: dict[str, Any] = Field(default_factory=dict)
72 | 


--------------------------------------------------------------------------------
/src/aijournal/domain/events.py:
--------------------------------------------------------------------------------
 1 | """Domain models describing claim change events and feedback adjustments."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Annotated
 6 | 
 7 | from pydantic import Field
 8 | 
 9 | from aijournal.common.base import StrictModel
10 | from aijournal.domain.claims import ClaimSource  # noqa: TC001
11 | from aijournal.domain.enums import ClaimEventAction, FeedbackDirection  # noqa: TC001
12 | 
13 | 
14 | class ClaimSignaturePayload(StrictModel):
15 |     """Serialized signature describing the target slot for a claim."""
16 | 
17 |     claim_type: str
18 |     subject: str
19 |     predicate: str
20 |     domain: str | None = None
21 |     context: list[str] = Field(default_factory=list)
22 | 
23 | 
24 | class ClaimConflictPayload(StrictModel):
25 |     """Structured conflict emitted during consolidation previews."""
26 | 
27 |     claim_id: str
28 |     signature: ClaimSignaturePayload
29 |     statement: str
30 |     existing_statement: str
31 |     incoming_statement: str
32 |     incoming_sources: list[ClaimSource] = Field(default_factory=list)
33 | 
34 | 
35 | class ClaimPreviewEvent(StrictModel):
36 |     """Outcome of attempting to merge a claim proposal into existing atoms."""
37 | 
38 |     kind: str = "preview"
39 |     action: ClaimEventAction
40 |     claim_id: str
41 |     delta_strength: float | None = None
42 |     statement: str | None = None
43 |     strength: float | None = None
44 |     signature: ClaimSignaturePayload | None = None
45 |     conflict: ClaimConflictPayload | None = None
46 |     related_claim_id: str | None = None
47 |     related_action: str | None = None
48 |     related_signature: ClaimSignaturePayload | None = None
49 | 
50 | 
51 | class FeedbackAdjustmentEvent(StrictModel):
52 |     """Record of a claim strength adjustment triggered by chat feedback."""
53 | 
54 |     kind: str = "feedback"
55 |     claim_id: str
56 |     old_strength: float
57 |     new_strength: float
58 |     delta: float
59 | 
60 | 
61 | ClaimChangeEvent = Annotated[
62 |     ClaimPreviewEvent | FeedbackAdjustmentEvent,
63 |     Field(discriminator="kind"),
64 | ]
65 | 
66 | 
67 | class FeedbackBatch(StrictModel):
68 |     """Batch of feedback adjustments queued for claim strength updates."""
69 | 
70 |     batch_id: str
71 |     created_at: str
72 |     session_id: str
73 |     question: str
74 |     feedback: FeedbackDirection
75 |     events: list[FeedbackAdjustmentEvent] = Field(default_factory=list)
76 | 


--------------------------------------------------------------------------------
/src/aijournal/schema.py:
--------------------------------------------------------------------------------
 1 | """Pydantic-backed validation helpers for aijournal payloads."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING, Any
 6 | 
 7 | from pydantic import BaseModel, ValidationError
 8 | 
 9 | from aijournal.domain.facts import DailySummary, MicroFactsFile
10 | from aijournal.domain.journal import NormalizedEntry
11 | from aijournal.domain.persona import InterviewSet, PersonaCore
12 | from aijournal.models.authoritative import ClaimsFile, JournalEntry, SelfProfile
13 | from aijournal.models.derived import (
14 |     AdviceCard,
15 |     ProfileUpdateBatch,
16 | )
17 | 
18 | if TYPE_CHECKING:
19 |     from collections.abc import Iterable
20 | 
21 | 
22 | class SchemaValidationError(ValueError):
23 |     """Raised when a payload does not conform to a named schema."""
24 | 
25 |     def __init__(self, schema: str, errors: Iterable[str]) -> None:
26 |         self.schema = schema
27 |         self.errors = list(errors)
28 |         message = f"Schema '{schema}' validation failed: {'; '.join(self.errors)}"
29 |         super().__init__(message)
30 | 
31 | 
32 | _MODEL_REGISTRY: dict[str, type[BaseModel]] = {
33 |     "advice": AdviceCard,
34 |     "claims": ClaimsFile,
35 |     "interviews": InterviewSet,
36 |     "journal_entry": JournalEntry,
37 |     "microfacts": MicroFactsFile,
38 |     "normalized_entry": NormalizedEntry,
39 |     "persona_core": PersonaCore,
40 |     "profile_updates": ProfileUpdateBatch,
41 |     "self_profile": SelfProfile,
42 |     "summary": DailySummary,
43 | }
44 | 
45 | 
46 | def _resolve_model(schema_name: str) -> type[BaseModel]:
47 |     try:
48 |         return _MODEL_REGISTRY[schema_name]
49 |     except KeyError as exc:  # pragma: no cover - defensive guard
50 |         msg = f"Unknown schema requested: {schema_name}"
51 |         raise ValueError(msg) from exc
52 | 
53 | 
54 | def validate_schema(schema_name: str, payload: Any) -> None:
55 |     """Validate payload against the named schema or raise SchemaValidationError."""
56 |     model = _resolve_model(schema_name)
57 |     errors: list[str] = []
58 |     try:
59 |         model.model_validate(payload)
60 |     except ValidationError as exc:
61 |         for err in exc.errors():
62 |             location = ".".join(str(part) for part in err.get("loc", ())) or "<root>"
63 |             errors.append(f"{location}: {err.get('msg', 'invalid value')}")
64 |     if errors:
65 |         raise SchemaValidationError(schema_name, errors)
66 | 


--------------------------------------------------------------------------------
/tests/simulator/test_human_simulator.py:
--------------------------------------------------------------------------------
 1 | """Tests for the human-style simulator harness."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import shutil
 6 | from typing import TYPE_CHECKING
 7 | 
 8 | import pytest
 9 | 
10 | from aijournal.simulator.orchestrator import HumanSimulator
11 | from aijournal.simulator.validators import StageValidatorRegistry, ValidatorContext
12 | 
13 | if TYPE_CHECKING:
14 |     from pathlib import Path
15 | 
16 | 
17 | @pytest.fixture
18 | def simulator_workspace(tmp_path: Path) -> Path:
19 |     return tmp_path / "sim-workspace"
20 | 
21 | 
22 | def test_simulator_runs_end_to_end(
23 |     monkeypatch: pytest.MonkeyPatch,
24 |     simulator_workspace: Path,
25 | ) -> None:
26 |     monkeypatch.setenv("AIJOURNAL_FAKE_OLLAMA", "1")
27 |     simulator = HumanSimulator(max_stage=7)
28 |     report = simulator.run(workspace=simulator_workspace, keep_workspace=True)
29 | 
30 |     assert report.validation.ok
31 | 
32 |     expected_stage_map = {
33 |         0: {"persist"},
34 |         1: {"normalize"},
35 |         2: {"derive.summarize"},
36 |         3: {"derive.extract_facts"},
37 |         4: {"derive.profile_update", "derive.review"},
38 |         5: {"refresh.index"},
39 |         6: {"refresh.persona"},
40 |         7: {"derive.pack"},
41 |     }
42 |     seen_stage_ids: set[int] = set()
43 |     for stage_result in report.capture_result.stage_results:
44 |         for stage_id, names in expected_stage_map.items():
45 |             if stage_result.stage in names:
46 |                 seen_stage_ids.add(stage_id)
47 |                 break
48 | 
49 |     assert seen_stage_ids == set(range(8)), "missing stage results for one or more stages"
50 |     assert report.workspace.exists()
51 | 
52 |     changed_dates = {
53 |         entry.date for entry in report.capture_result.entries if entry.changed and not entry.deduped
54 |     }
55 |     assert changed_dates  # sanity guard
56 | 
57 |     # Tamper with one artifact to ensure validators catch regressions.
58 |     first_date = sorted(changed_dates)[0]
59 |     microfacts_path = report.workspace / "derived" / "microfacts" / f"{first_date}.yaml"
60 |     if microfacts_path.exists():
61 |         microfacts_path.unlink()
62 |     tampered = StageValidatorRegistry().run(
63 |         ValidatorContext(workspace=report.workspace, capture=report.capture_result),
64 |         stages=[3],
65 |     )
66 |     assert not tampered.ok
67 | 
68 |     shutil.rmtree(report.workspace, ignore_errors=True)
69 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.chat_sessions.ChatSessionSummary.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "description": "Aggregated summary metadata for a chat session.",
  3 |   "properties": {
  4 |     "created_at": {
  5 |       "title": "Created At",
  6 |       "type": "string"
  7 |     },
  8 |     "intent_counts": {
  9 |       "additionalProperties": {
 10 |         "type": "integer"
 11 |       },
 12 |       "title": "Intent Counts",
 13 |       "type": "object"
 14 |     },
 15 |     "last_answer_preview": {
 16 |       "anyOf": [
 17 |         {
 18 |           "type": "string"
 19 |         },
 20 |         {
 21 |           "type": "null"
 22 |         }
 23 |       ],
 24 |       "default": null,
 25 |       "title": "Last Answer Preview"
 26 |     },
 27 |     "last_citations": {
 28 |       "items": {
 29 |         "type": "string"
 30 |       },
 31 |       "title": "Last Citations",
 32 |       "type": "array"
 33 |     },
 34 |     "last_clarifying_question": {
 35 |       "anyOf": [
 36 |         {
 37 |           "type": "string"
 38 |         },
 39 |         {
 40 |           "type": "null"
 41 |         }
 42 |       ],
 43 |       "default": null,
 44 |       "title": "Last Clarifying Question"
 45 |     },
 46 |     "last_feedback": {
 47 |       "anyOf": [
 48 |         {
 49 |           "type": "string"
 50 |         },
 51 |         {
 52 |           "type": "null"
 53 |         }
 54 |       ],
 55 |       "default": null,
 56 |       "title": "Last Feedback"
 57 |     },
 58 |     "last_question": {
 59 |       "anyOf": [
 60 |         {
 61 |           "type": "string"
 62 |         },
 63 |         {
 64 |           "type": "null"
 65 |         }
 66 |       ],
 67 |       "default": null,
 68 |       "title": "Last Question"
 69 |     },
 70 |     "last_retrieval_ms": {
 71 |       "anyOf": [
 72 |         {
 73 |           "type": "number"
 74 |         },
 75 |         {
 76 |           "type": "null"
 77 |         }
 78 |       ],
 79 |       "default": null,
 80 |       "title": "Last Retrieval Ms"
 81 |     },
 82 |     "session_id": {
 83 |       "title": "Session Id",
 84 |       "type": "string"
 85 |     },
 86 |     "turn_count": {
 87 |       "default": 0,
 88 |       "title": "Turn Count",
 89 |       "type": "integer"
 90 |     },
 91 |     "updated_at": {
 92 |       "title": "Updated At",
 93 |       "type": "string"
 94 |     }
 95 |   },
 96 |   "required": [
 97 |     "session_id",
 98 |     "created_at",
 99 |     "updated_at"
100 |   ],
101 |   "title": "ChatSessionSummary",
102 |   "type": "object"
103 | }
104 | 


--------------------------------------------------------------------------------
/src/aijournal/api/chat.py:
--------------------------------------------------------------------------------
 1 | """Strict chat API models shared by CLI and services."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING, Any, Literal
 6 | 
 7 | from pydantic import Field
 8 | 
 9 | from aijournal.common.base import StrictModel
10 | 
11 | if TYPE_CHECKING:
12 |     from aijournal.domain.index import RetrievedChunk
13 | 
14 | 
15 | class ChatCitation(StrictModel):
16 |     """Reference to a retrieved chunk included in a chat response."""
17 | 
18 |     chunk_id: str
19 |     code: str
20 |     normalized_id: str
21 |     chunk_index: int
22 |     source_path: str
23 |     date: str
24 |     tags: list[str] = Field(default_factory=list)
25 |     score: float
26 |     chunk_type: str
27 | 
28 |     @property
29 |     def marker(self) -> str:
30 |         label = self.chunk_type or "entry"
31 |         return f"[{label}:{self.code}]"
32 | 
33 |     @classmethod
34 |     def from_chunk(cls, chunk: RetrievedChunk) -> ChatCitation:
35 |         code = f"{chunk.normalized_id}#p{chunk.chunk_index}"
36 |         return cls(
37 |             chunk_id=chunk.chunk_id,
38 |             code=code,
39 |             normalized_id=chunk.normalized_id,
40 |             chunk_index=chunk.chunk_index,
41 |             source_path=chunk.source_path,
42 |             date=chunk.date,
43 |             tags=list(chunk.tags),
44 |             score=chunk.score,
45 |             chunk_type=chunk.chunk_type or "entry",
46 |         )
47 | 
48 | 
49 | class ChatCitationRef(StrictModel):
50 |     """Reference emitted by the LLM; resolved against retrieved chunks."""
51 | 
52 |     code: str = Field(min_length=1)
53 | 
54 | 
55 | class ChatResponse(StrictModel):
56 |     """Structured response returned by the chat LLM."""
57 | 
58 |     answer: str = Field(..., max_length=4000)
59 |     citations: list[ChatCitationRef] = Field(default_factory=list)
60 |     clarifying_question: str | None = None
61 |     telemetry: dict[str, Any] = Field(default_factory=dict)
62 |     timestamp: str | None = None
63 | 
64 | 
65 | class ChatRequest(StrictModel):
66 |     """Incoming chat payload for both CLI and FastAPI surfaces."""
67 | 
68 |     question: str = Field(min_length=1)
69 |     top: int | None = Field(default=None, ge=1)
70 |     tags: list[str] | None = None
71 |     source: list[str] | None = None
72 |     date_from: str | None = None
73 |     date_to: str | None = None
74 |     session_id: str | None = Field(default=None, pattern=r"^[A-Za-z0-9_.\-]+$")
75 |     save: bool = True
76 |     feedback: Literal["up", "down"] | None = None
77 | 


--------------------------------------------------------------------------------
/src/aijournal/io/yaml_io.py:
--------------------------------------------------------------------------------
 1 | """Typed YAML serialization helpers for Pydantic models."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from enum import Enum
 6 | from typing import TYPE_CHECKING, Any, TypeVar
 7 | 
 8 | import yaml
 9 | from pydantic import BaseModel
10 | 
11 | if TYPE_CHECKING:
12 |     from pathlib import Path
13 | 
14 | T = TypeVar("T", bound=BaseModel)
15 | 
16 | 
17 | class _EnumSafeDumper(yaml.SafeDumper):
18 |     """YAML dumper that serializes enum instances as their values."""
19 | 
20 | 
21 | def _enum_representer(dumper: _EnumSafeDumper, value: Enum) -> yaml.Node:
22 |     payload = value.value if hasattr(value, "value") else value
23 |     return dumper.represent_scalar("tag:yaml.org,2002:str", str(payload))
24 | 
25 | 
26 | _EnumSafeDumper.add_multi_representer(Enum, _enum_representer)
27 | 
28 | 
29 | def _str_representer(dumper: _EnumSafeDumper, value: str) -> yaml.Node:
30 |     """Render unicode directly and pretty-print multiline scalars."""
31 |     style = "|" if "\n" in value else None
32 |     return dumper.represent_scalar("tag:yaml.org,2002:str", value, style=style)
33 | 
34 | 
35 | _EnumSafeDumper.add_representer(str, _str_representer)
36 | 
37 | 
38 | def _read_yaml(path: Path) -> Any:
39 |     data = yaml.safe_load(path.read_text(encoding="utf-8"))
40 |     return data if data is not None else {}
41 | 
42 | 
43 | def load_yaml_model(path: Path, cls: type[T], *, default: T | None = None) -> T:
44 |     """Load a YAML document into the requested Pydantic model."""
45 |     if not path.exists():
46 |         if default is not None:
47 |             return default
48 |         raise FileNotFoundError(path)
49 |     data = _read_yaml(path)
50 |     return cls.model_validate(data)
51 | 
52 | 
53 | def dump_yaml(data: Any, *, sort_keys: bool = False) -> str:
54 |     """Serialize arbitrary data to YAML using the enum-safe dumper."""
55 |     return yaml.dump(
56 |         data,
57 |         Dumper=_EnumSafeDumper,
58 |         sort_keys=sort_keys,
59 |         allow_unicode=True,
60 |     )
61 | 
62 | 
63 | def write_yaml_model(path: Path, instance: T) -> None:
64 |     """Persist a Pydantic model instance to YAML on disk."""
65 |     payload = instance.model_dump(mode="python", exclude_none=False)
66 |     path.parent.mkdir(parents=True, exist_ok=True)
67 |     serialized = dump_yaml(payload, sort_keys=False)
68 |     if path.exists():
69 |         existing = path.read_text(encoding="utf-8")
70 |         if existing == serialized:
71 |             return
72 |     path.write_text(serialized, encoding="utf-8")
73 | 


--------------------------------------------------------------------------------
/src/aijournal/services/capture/stages/stage8_pack.py:
--------------------------------------------------------------------------------
 1 | """Stage 8: assemble shareable persona/context packs."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from time import perf_counter
 6 | from typing import TYPE_CHECKING
 7 | 
 8 | import typer
 9 | 
10 | if TYPE_CHECKING:
11 |     from pathlib import Path
12 | 
13 |     from aijournal.api.capture import CaptureInput
14 |     from aijournal.services.capture import PackStage8Outputs
15 | 
16 | 
17 | def run_pack_stage_8(
18 |     inputs: CaptureInput,
19 |     root: Path,
20 |     run_id: str,
21 |     persona_changed: bool,
22 | ) -> PackStage8Outputs:
23 |     from aijournal.commands.pack import run_pack
24 |     from aijournal.services.capture import PackStage8Outputs
25 |     from aijournal.services.capture.results import OperationResult
26 |     from aijournal.services.capture.utils import relative_path
27 | 
28 |     if not inputs.pack:
29 |         return PackStage8Outputs(OperationResult.noop("no pack requested"), 0.0)
30 |     if not persona_changed:
31 |         return PackStage8Outputs(
32 |             OperationResult.noop("persona unchanged, pack not regenerated"),
33 |             0.0,
34 |         )
35 | 
36 |     stage_start = perf_counter()
37 |     level = inputs.pack.upper()
38 |     history_days = 1 if level == "L4" else 0
39 |     pack_output = root / "derived" / "packs" / f"{level.lower()}_{run_id}.yaml"
40 |     pack_error: str | None = None
41 |     try:
42 |         run_pack(
43 |             level,
44 |             None,
45 |             output=pack_output,
46 |             max_tokens=None,
47 |             fmt="yaml",
48 |             history_days=history_days,
49 |             dry_run=False,
50 |         )
51 |     except typer.Exit as exc:
52 |         if exc.exit_code not in (0,):
53 |             pack_error = str(exc)
54 |     except Exception as exc:  # pragma: no cover - defensive
55 |         pack_error = str(exc)
56 |     duration_ms = (perf_counter() - stage_start) * 1000.0
57 |     pack_details: dict[str, object] = {"level": level, "history_days": history_days}
58 |     if pack_error is not None:
59 |         op_result = OperationResult.fail(
60 |             f"pack generation failed: {pack_error}",
61 |             details=pack_details,
62 |         )
63 |     else:
64 |         rel_output = relative_path(pack_output, root)
65 |         op_result = OperationResult.wrote(
66 |             [rel_output],
67 |             message="pack generated",
68 |             details=pack_details,
69 |         )
70 |     return PackStage8Outputs(op_result, duration_ms)
71 | 


--------------------------------------------------------------------------------
/src/aijournal/services/capture/stages/stage2_summarize.py:
--------------------------------------------------------------------------------
 1 | """Stage 2: generate and persist daily summaries."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from time import perf_counter
 6 | from typing import TYPE_CHECKING
 7 | 
 8 | if TYPE_CHECKING:
 9 |     from pathlib import Path
10 | 
11 |     from aijournal.api.capture import CaptureInput
12 |     from aijournal.common.app_config import AppConfig
13 |     from aijournal.services.capture import SummarizeStage2Outputs
14 | 
15 | 
16 | def run_summarize_stage_2(
17 |     changed_dates: list[str],
18 |     inputs: CaptureInput,
19 |     root: Path,
20 |     config: AppConfig,
21 | ) -> SummarizeStage2Outputs:
22 |     from aijournal.services.capture import SummarizeStage2Outputs
23 |     from aijournal.services.capture.graceful import graceful_summarize
24 |     from aijournal.services.capture.results import OperationResult
25 |     from aijournal.services.capture.utils import relative_path
26 | 
27 |     stage_start = perf_counter()
28 |     summary_paths: list[str] = []
29 |     summary_errors: list[str] = []
30 |     for date in changed_dates:
31 |         summary_path, error = graceful_summarize(
32 |             date,
33 |             progress=inputs.progress,
34 |             workspace=root,
35 |             config=config,
36 |         )
37 |         if error:
38 |             summary_errors.append(f"{date}: {error}")
39 |         elif summary_path:
40 |             summary_paths.append(relative_path(summary_path, root))
41 |     duration_ms = (perf_counter() - stage_start) * 1000.0
42 |     summary_details: dict[str, object] = {"dates": changed_dates}
43 |     if summary_errors:
44 |         message = "summaries completed with errors" if summary_paths else "summaries failed"
45 |         op_result = OperationResult(
46 |             ok=bool(summary_paths),
47 |             changed=bool(summary_paths),
48 |             message=message,
49 |             artifacts=summary_paths,
50 |             warnings=summary_errors,
51 |             details=summary_details,
52 |         )
53 |     elif summary_paths:
54 |         message = f"generated summaries for {len(summary_paths)} entries"
55 |         op_result = OperationResult.wrote(
56 |             summary_paths,
57 |             message=message,
58 |             details=summary_details,
59 |         )
60 |     else:
61 |         op_result = OperationResult.noop(
62 |             "summaries already up to date",
63 |             details=summary_details,
64 |         )
65 |     return SummarizeStage2Outputs(op_result, duration_ms, summary_paths)
66 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.api.chat.ChatRequest.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "description": "Incoming chat payload for both CLI and FastAPI surfaces.",
  3 |   "properties": {
  4 |     "date_from": {
  5 |       "anyOf": [
  6 |         {
  7 |           "type": "string"
  8 |         },
  9 |         {
 10 |           "type": "null"
 11 |         }
 12 |       ],
 13 |       "default": null,
 14 |       "title": "Date From"
 15 |     },
 16 |     "date_to": {
 17 |       "anyOf": [
 18 |         {
 19 |           "type": "string"
 20 |         },
 21 |         {
 22 |           "type": "null"
 23 |         }
 24 |       ],
 25 |       "default": null,
 26 |       "title": "Date To"
 27 |     },
 28 |     "feedback": {
 29 |       "anyOf": [
 30 |         {
 31 |           "enum": [
 32 |             "up",
 33 |             "down"
 34 |           ],
 35 |           "type": "string"
 36 |         },
 37 |         {
 38 |           "type": "null"
 39 |         }
 40 |       ],
 41 |       "default": null,
 42 |       "title": "Feedback"
 43 |     },
 44 |     "question": {
 45 |       "minLength": 1,
 46 |       "title": "Question",
 47 |       "type": "string"
 48 |     },
 49 |     "save": {
 50 |       "default": true,
 51 |       "title": "Save",
 52 |       "type": "boolean"
 53 |     },
 54 |     "session_id": {
 55 |       "anyOf": [
 56 |         {
 57 |           "pattern": "^[A-Za-z0-9_.\\-]+$",
 58 |           "type": "string"
 59 |         },
 60 |         {
 61 |           "type": "null"
 62 |         }
 63 |       ],
 64 |       "default": null,
 65 |       "title": "Session Id"
 66 |     },
 67 |     "source": {
 68 |       "anyOf": [
 69 |         {
 70 |           "items": {
 71 |             "type": "string"
 72 |           },
 73 |           "type": "array"
 74 |         },
 75 |         {
 76 |           "type": "null"
 77 |         }
 78 |       ],
 79 |       "default": null,
 80 |       "title": "Source"
 81 |     },
 82 |     "tags": {
 83 |       "anyOf": [
 84 |         {
 85 |           "items": {
 86 |             "type": "string"
 87 |           },
 88 |           "type": "array"
 89 |         },
 90 |         {
 91 |           "type": "null"
 92 |         }
 93 |       ],
 94 |       "default": null,
 95 |       "title": "Tags"
 96 |     },
 97 |     "top": {
 98 |       "anyOf": [
 99 |         {
100 |           "minimum": 1,
101 |           "type": "integer"
102 |         },
103 |         {
104 |           "type": "null"
105 |         }
106 |       ],
107 |       "default": null,
108 |       "title": "Top"
109 |     }
110 |   },
111 |   "required": [
112 |     "question"
113 |   ],
114 |   "title": "ChatRequest",
115 |   "type": "object"
116 | }
117 | 


--------------------------------------------------------------------------------
/tests/services/test_claim_id_generation.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from aijournal.commands import profile as profile_cmd
 4 | from aijournal.domain.changes import ClaimAtomInput, ClaimProposal
 5 | from aijournal.domain.claims import Scope
 6 | from aijournal.domain.enums import ClaimStatus, ClaimType
 7 | from aijournal.domain.evidence import SourceRef
 8 | from aijournal.utils import time as time_utils
 9 | 
10 | 
11 | def _make_proposal(
12 |     statement: str,
13 |     *,
14 |     normalized_id: str = "entry-2006-12-01",
15 |     predicate: str = "prefers",
16 | ) -> ClaimProposal:
17 |     claim_input = ClaimAtomInput(
18 |         type=ClaimType.PREFERENCE,
19 |         subject="work",
20 |         predicate=predicate,
21 |         statement=statement,
22 |         scope=Scope(),
23 |         strength=0.6,
24 |         status=ClaimStatus.ACCEPTED,
25 |         review_after_days=120,
26 |     )
27 |     return ClaimProposal(
28 |         type=claim_input.type,
29 |         subject=claim_input.subject,
30 |         predicate=claim_input.predicate,
31 |         statement=claim_input.statement,
32 |         scope=claim_input.scope,
33 |         strength=claim_input.strength,
34 |         status=claim_input.status,
35 |         review_after_days=claim_input.review_after_days,
36 |         normalized_ids=[normalized_id],
37 |         evidence=[SourceRef(entry_id=normalized_id, spans=[])],
38 |     )
39 | 
40 | 
41 | def test_claim_ids_include_hash_suffix_for_uniqueness() -> None:
42 |     proposal_a = _make_proposal("Prefers morning planning sessions")
43 |     proposal_b = _make_proposal("Prefers evening reflection rituals")
44 | 
45 |     id_a = profile_cmd._proposal_claim_id(
46 |         proposal_a,
47 |         proposal_a.statement,
48 |         set(),
49 |     )
50 |     id_b = profile_cmd._proposal_claim_id(
51 |         proposal_b,
52 |         proposal_b.statement,
53 |         {id_a},
54 |     )
55 | 
56 |     assert id_a != id_b
57 |     assert id_a.startswith("entry-2006-12-01-")
58 |     assert id_b.startswith("entry-2006-12-01-")
59 | 
60 | 
61 | def test_apply_claim_proposal_keeps_every_statement() -> None:
62 |     timestamp = time_utils.format_timestamp(time_utils.now())
63 |     claims: list = []
64 |     proposals = [
65 |         _make_proposal("Blocks mornings for planning", predicate="plans_mornings"),
66 |         _make_proposal("Schedules evening retros", predicate="retros_evenings"),
67 |     ]
68 | 
69 |     for proposal in proposals:
70 |         profile_cmd._apply_claim_proposal(claims, proposal, timestamp)
71 | 
72 |     assert len(claims) == len(proposals)
73 |     assert len({claim.id for claim in claims}) == len(proposals)
74 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.chat_sessions.ChatTranscriptTurn.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "$defs": {
  3 |     "ChatTelemetry": {
  4 |       "description": "Telemetry captured during a chat turn.",
  5 |       "properties": {
  6 |         "chunk_count": {
  7 |           "title": "Chunk Count",
  8 |           "type": "integer"
  9 |         },
 10 |         "model": {
 11 |           "title": "Model",
 12 |           "type": "string"
 13 |         },
 14 |         "retrieval_ms": {
 15 |           "title": "Retrieval Ms",
 16 |           "type": "number"
 17 |         },
 18 |         "retriever_source": {
 19 |           "title": "Retriever Source",
 20 |           "type": "string"
 21 |         }
 22 |       },
 23 |       "required": [
 24 |         "retrieval_ms",
 25 |         "chunk_count",
 26 |         "retriever_source",
 27 |         "model"
 28 |       ],
 29 |       "title": "ChatTelemetry",
 30 |       "type": "object"
 31 |     }
 32 |   },
 33 |   "description": "Captured question/answer pair within a chat transcript.",
 34 |   "properties": {
 35 |     "answer": {
 36 |       "title": "Answer",
 37 |       "type": "string"
 38 |     },
 39 |     "citations": {
 40 |       "items": {
 41 |         "type": "string"
 42 |       },
 43 |       "title": "Citations",
 44 |       "type": "array"
 45 |     },
 46 |     "clarifying_question": {
 47 |       "anyOf": [
 48 |         {
 49 |           "type": "string"
 50 |         },
 51 |         {
 52 |           "type": "null"
 53 |         }
 54 |       ],
 55 |       "default": null,
 56 |       "title": "Clarifying Question"
 57 |     },
 58 |     "fake_mode": {
 59 |       "title": "Fake Mode",
 60 |       "type": "boolean"
 61 |     },
 62 |     "feedback": {
 63 |       "anyOf": [
 64 |         {
 65 |           "type": "string"
 66 |         },
 67 |         {
 68 |           "type": "null"
 69 |         }
 70 |       ],
 71 |       "default": null,
 72 |       "title": "Feedback"
 73 |     },
 74 |     "intent": {
 75 |       "title": "Intent",
 76 |       "type": "string"
 77 |     },
 78 |     "question": {
 79 |       "title": "Question",
 80 |       "type": "string"
 81 |     },
 82 |     "telemetry": {
 83 |       "$ref": "#/$defs/ChatTelemetry"
 84 |     },
 85 |     "timestamp": {
 86 |       "title": "Timestamp",
 87 |       "type": "string"
 88 |     },
 89 |     "turn_index": {
 90 |       "title": "Turn Index",
 91 |       "type": "integer"
 92 |     }
 93 |   },
 94 |   "required": [
 95 |     "turn_index",
 96 |     "timestamp",
 97 |     "question",
 98 |     "answer",
 99 |     "intent",
100 |     "telemetry",
101 |     "fake_mode"
102 |   ],
103 |   "title": "ChatTranscriptTurn",
104 |   "type": "object"
105 | }
106 | 


--------------------------------------------------------------------------------
/src/aijournal/services/capture/stages/stage3_facts.py:
--------------------------------------------------------------------------------
 1 | """Stage 3: extract micro-facts and store consolidated records."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from time import perf_counter
 6 | from typing import TYPE_CHECKING
 7 | 
 8 | if TYPE_CHECKING:
 9 |     from pathlib import Path
10 | 
11 |     from aijournal.api.capture import CaptureInput
12 |     from aijournal.common.app_config import AppConfig
13 |     from aijournal.services.capture import FactsStage3Outputs
14 | 
15 | 
16 | def run_facts_stage_3(
17 |     changed_dates: list[str],
18 |     inputs: CaptureInput,
19 |     root: Path,
20 |     config: AppConfig,
21 | ) -> FactsStage3Outputs:
22 |     from aijournal.commands.profile import load_profile_components
23 |     from aijournal.services.capture import FactsStage3Outputs
24 |     from aijournal.services.capture.graceful import graceful_facts
25 |     from aijournal.services.capture.results import OperationResult
26 |     from aijournal.services.capture.utils import relative_path
27 | 
28 |     stage_start = perf_counter()
29 |     facts_paths: list[str] = []
30 |     facts_errors: list[str] = []
31 |     _, claim_models = load_profile_components(root, config=config)
32 |     for date in changed_dates:
33 |         facts_path, error = graceful_facts(
34 |             date,
35 |             progress=inputs.progress,
36 |             claim_models=claim_models,
37 |             generate_preview=False,
38 |             workspace=root,
39 |             config=config,
40 |         )
41 |         if error:
42 |             facts_errors.append(f"{date}: {error}")
43 |         elif facts_path:
44 |             facts_paths.append(relative_path(facts_path, root))
45 |     duration_ms = (perf_counter() - stage_start) * 1000.0
46 |     facts_details: dict[str, object] = {"dates": changed_dates}
47 |     if facts_errors:
48 |         message = "facts completed with errors" if facts_paths else "facts failed"
49 |         op_result = OperationResult(
50 |             ok=bool(facts_paths),
51 |             changed=bool(facts_paths),
52 |             message=message,
53 |             artifacts=facts_paths,
54 |             warnings=facts_errors,
55 |             details=facts_details,
56 |         )
57 |     elif facts_paths:
58 |         message = f"extracted micro-facts for {len(facts_paths)} entries"
59 |         op_result = OperationResult.wrote(
60 |             facts_paths,
61 |             message=message,
62 |             details=facts_details,
63 |         )
64 |     else:
65 |         op_result = OperationResult.noop(
66 |             "micro-facts already up to date",
67 |             details=facts_details,
68 |         )
69 |     return FactsStage3Outputs(op_result, duration_ms, facts_paths)
70 | 


--------------------------------------------------------------------------------
/tests/prompts/test_metadata_filtering.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import pytest
 4 | from pydantic import ValidationError
 5 | 
 6 | from aijournal.domain.prompts import (
 7 |     PromptMicroFact,
 8 |     PromptMicroFacts,
 9 |     convert_prompt_microfacts,
10 |     is_metadata_only_fact,
11 | )
12 | 
13 | 
14 | def _fact(
15 |     *,
16 |     fact_id: str = "fact-1",
17 |     statement: str = "Completed deep work block",
18 |     evidence_entry: str | None = "entry-1",
19 | ) -> PromptMicroFact:
20 |     return PromptMicroFact(
21 |         id=fact_id,
22 |         statement=statement,
23 |         confidence=0.9,
24 |         evidence_entry=evidence_entry,
25 |     )
26 | 
27 | 
28 | def test_is_metadata_only_fact_detects_common_patterns() -> None:
29 |     assert is_metadata_only_fact(_fact(fact_id="entry-created-foo"))
30 |     assert is_metadata_only_fact(_fact(statement="Entry created on 2025-11-14"))
31 |     assert is_metadata_only_fact(_fact(statement="Title is Focus Sprint"))
32 |     assert is_metadata_only_fact(_fact(evidence_entry=None, statement="Any content"))
33 | 
34 | 
35 | def test_is_metadata_only_fact_allows_grounded_content() -> None:
36 |     assert not is_metadata_only_fact(
37 |         _fact(statement="Completed 2h focus block on auth plan", fact_id="focus-block"),
38 |     )
39 | 
40 | 
41 | def test_convert_prompt_microfacts_filters_metadata_only_entries() -> None:
42 |     prompt = PromptMicroFacts(
43 |         facts=[
44 |             _fact(fact_id="entry-created-foo", statement="Entry created on 2025-11-14"),
45 |             _fact(fact_id="focus-block", statement="Completed 2h focus block"),
46 |         ],
47 |     )
48 | 
49 |     result = convert_prompt_microfacts(prompt, entry_dates={"entry-1": "2025-11-14"})
50 | 
51 |     statements = [fact.statement for fact in result.facts]
52 |     assert statements == ["Completed 2h focus block"]
53 | 
54 | 
55 | def test_prompt_microfacts_validator_rejects_unknown_entry_ids() -> None:
56 |     payload = {
57 |         "facts": [
58 |             {
59 |                 "id": "focus-block",
60 |                 "statement": "Completed 2h focus block",
61 |                 "confidence": 0.8,
62 |                 "evidence_entry": "entry-1",
63 |             },
64 |         ],
65 |         "claim_proposals": [],
66 |     }
67 | 
68 |     # Valid when context lists entry-1.
69 |     instance = PromptMicroFacts.model_validate(payload, context={"entry_ids": ["entry-1"]})
70 |     assert instance.facts[0].evidence_entry == "entry-1"
71 | 
72 |     # Invalid when entry ID missing from context.
73 |     with pytest.raises(ValidationError):
74 |         PromptMicroFacts.model_validate(payload, context={"entry_ids": ["other-entry"]})
75 | 


--------------------------------------------------------------------------------
/src/aijournal/common/command_runner.py:
--------------------------------------------------------------------------------
 1 | """Utility to run command pipelines with standardized logging."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pathlib import Path
 6 | from typing import TYPE_CHECKING, Any, TypeVar
 7 | 
 8 | from pydantic import BaseModel
 9 | from pydantic_core import PydanticSerializationError
10 | 
11 | if TYPE_CHECKING:
12 |     from collections.abc import Callable
13 | 
14 |     from aijournal.common.context import RunContext
15 | 
16 | OptionsT = TypeVar("OptionsT", bound=BaseModel)
17 | PreparedT = TypeVar("PreparedT")
18 | ResultT = TypeVar("ResultT")
19 | OutputT = TypeVar("OutputT")
20 | 
21 | 
22 | def run_command_pipeline(
23 |     ctx: RunContext,
24 |     options: OptionsT,
25 |     *,
26 |     prepare_inputs: Callable[[RunContext, OptionsT], PreparedT],
27 |     invoke_pipeline: Callable[[RunContext, PreparedT], ResultT],
28 |     persist_output: Callable[[RunContext, ResultT], OutputT],
29 | ) -> OutputT:
30 |     ctx.emit(event="command_start", options=_summarize(options))
31 |     with ctx.span("prepare_inputs"):
32 |         prepared = prepare_inputs(ctx, options)
33 |     with ctx.span("invoke_pipeline"):
34 |         result = invoke_pipeline(ctx, prepared)
35 |     with ctx.span("persist_output"):
36 |         output = persist_output(ctx, result)
37 |     ctx.emit(event="command_complete", output=_summarize(output))
38 |     return output
39 | 
40 | 
41 | def _summarize(value: Any) -> Any:
42 |     result: Any
43 |     if isinstance(value, BaseModel):
44 |         try:
45 |             result = value.model_dump(exclude_none=True, mode="json")
46 |         except PydanticSerializationError:
47 |             raw = value.model_dump(exclude_none=True, mode="python")
48 |             result = _convert(raw)
49 |     elif isinstance(value, (str, int, float, bool)) or value is None:
50 |         result = value
51 |     elif isinstance(value, Path):
52 |         result = str(value)
53 |     elif isinstance(value, (list, tuple, set)):
54 |         result = [_summarize(item) for item in value]
55 |     elif hasattr(value, "model_dump"):
56 |         try:
57 |             result = value.model_dump()
58 |         except Exception:  # pragma: no cover - defensive
59 |             result = str(value)
60 |     else:
61 |         result = str(value)
62 |     return result
63 | 
64 | 
65 | def _convert(obj: Any) -> Any:
66 |     if isinstance(obj, dict):
67 |         return {key: _convert(val) for key, val in obj.items()}
68 |     if isinstance(obj, (list, tuple, set)):
69 |         return [_convert(item) for item in obj]
70 |     if callable(obj):
71 |         return getattr(obj, "__name__", "callable")
72 |     if isinstance(obj, Path):
73 |         return str(obj)
74 |     return obj
75 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.services.consolidator.ClaimConflict.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "$defs": {
  3 |     "ClaimSignature": {
  4 |       "properties": {
  5 |         "claim_type": {
  6 |           "title": "Claim Type",
  7 |           "type": "string"
  8 |         },
  9 |         "predicate": {
 10 |           "title": "Predicate",
 11 |           "type": "string"
 12 |         },
 13 |         "scope": {
 14 |           "maxItems": 2,
 15 |           "minItems": 2,
 16 |           "prefixItems": [
 17 |             {
 18 |               "anyOf": [
 19 |                 {
 20 |                   "type": "string"
 21 |                 },
 22 |                 {
 23 |                   "type": "null"
 24 |                 }
 25 |               ]
 26 |             },
 27 |             {
 28 |               "items": {
 29 |                 "type": "string"
 30 |               },
 31 |               "type": "array"
 32 |             }
 33 |           ],
 34 |           "title": "Scope",
 35 |           "type": "array"
 36 |         },
 37 |         "subject": {
 38 |           "title": "Subject",
 39 |           "type": "string"
 40 |         }
 41 |       },
 42 |       "required": [
 43 |         "claim_type",
 44 |         "subject",
 45 |         "predicate",
 46 |         "scope"
 47 |       ],
 48 |       "title": "ClaimSignature",
 49 |       "type": "object"
 50 |     },
 51 |     "SourceRef": {
 52 |       "description": "Reference to a normalized entry that supports a claim or fact.",
 53 |       "properties": {
 54 |         "entry_id": {
 55 |           "title": "Entry Id",
 56 |           "type": "string"
 57 |         }
 58 |       },
 59 |       "required": [
 60 |         "entry_id"
 61 |       ],
 62 |       "title": "SourceRef",
 63 |       "type": "object"
 64 |     }
 65 |   },
 66 |   "properties": {
 67 |     "claim_id": {
 68 |       "title": "Claim Id",
 69 |       "type": "string"
 70 |     },
 71 |     "existing_statement": {
 72 |       "title": "Existing Statement",
 73 |       "type": "string"
 74 |     },
 75 |     "incoming_sources": {
 76 |       "items": {
 77 |         "$ref": "#/$defs/SourceRef"
 78 |       },
 79 |       "title": "Incoming Sources",
 80 |       "type": "array"
 81 |     },
 82 |     "incoming_statement": {
 83 |       "title": "Incoming Statement",
 84 |       "type": "string"
 85 |     },
 86 |     "signature": {
 87 |       "$ref": "#/$defs/ClaimSignature"
 88 |     },
 89 |     "statement": {
 90 |       "title": "Statement",
 91 |       "type": "string"
 92 |     }
 93 |   },
 94 |   "required": [
 95 |     "claim_id",
 96 |     "signature",
 97 |     "statement",
 98 |     "existing_statement",
 99 |     "incoming_statement"
100 |   ],
101 |   "title": "ClaimConflict",
102 |   "type": "object"
103 | }
104 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.events.ClaimConflictPayload.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "$defs": {
  3 |     "ClaimSignaturePayload": {
  4 |       "description": "Serialized signature describing the target slot for a claim.",
  5 |       "properties": {
  6 |         "claim_type": {
  7 |           "title": "Claim Type",
  8 |           "type": "string"
  9 |         },
 10 |         "context": {
 11 |           "items": {
 12 |             "type": "string"
 13 |           },
 14 |           "title": "Context",
 15 |           "type": "array"
 16 |         },
 17 |         "domain": {
 18 |           "anyOf": [
 19 |             {
 20 |               "type": "string"
 21 |             },
 22 |             {
 23 |               "type": "null"
 24 |             }
 25 |           ],
 26 |           "default": null,
 27 |           "title": "Domain"
 28 |         },
 29 |         "predicate": {
 30 |           "title": "Predicate",
 31 |           "type": "string"
 32 |         },
 33 |         "subject": {
 34 |           "title": "Subject",
 35 |           "type": "string"
 36 |         }
 37 |       },
 38 |       "required": [
 39 |         "claim_type",
 40 |         "subject",
 41 |         "predicate"
 42 |       ],
 43 |       "title": "ClaimSignaturePayload",
 44 |       "type": "object"
 45 |     },
 46 |     "SourceRef": {
 47 |       "description": "Reference to a normalized entry that supports a claim or fact.",
 48 |       "properties": {
 49 |         "entry_id": {
 50 |           "title": "Entry Id",
 51 |           "type": "string"
 52 |         }
 53 |       },
 54 |       "required": [
 55 |         "entry_id"
 56 |       ],
 57 |       "title": "SourceRef",
 58 |       "type": "object"
 59 |     }
 60 |   },
 61 |   "description": "Structured conflict emitted during consolidation previews.",
 62 |   "properties": {
 63 |     "claim_id": {
 64 |       "title": "Claim Id",
 65 |       "type": "string"
 66 |     },
 67 |     "existing_statement": {
 68 |       "title": "Existing Statement",
 69 |       "type": "string"
 70 |     },
 71 |     "incoming_sources": {
 72 |       "items": {
 73 |         "$ref": "#/$defs/SourceRef"
 74 |       },
 75 |       "title": "Incoming Sources",
 76 |       "type": "array"
 77 |     },
 78 |     "incoming_statement": {
 79 |       "title": "Incoming Statement",
 80 |       "type": "string"
 81 |     },
 82 |     "signature": {
 83 |       "$ref": "#/$defs/ClaimSignaturePayload"
 84 |     },
 85 |     "statement": {
 86 |       "title": "Statement",
 87 |       "type": "string"
 88 |     }
 89 |   },
 90 |   "required": [
 91 |     "claim_id",
 92 |     "signature",
 93 |     "statement",
 94 |     "existing_statement",
 95 |     "incoming_statement"
 96 |   ],
 97 |   "title": "ClaimConflictPayload",
 98 |   "type": "object"
 99 | }
100 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.changes.FacetChange.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "$defs": {
  3 |     "FacetOperation": {
  4 |       "enum": [
  5 |         "set",
  6 |         "remove",
  7 |         "merge"
  8 |       ],
  9 |       "title": "FacetOperation",
 10 |       "type": "string"
 11 |     },
 12 |     "SourceRef": {
 13 |       "description": "Reference to a normalized entry that supports a claim or fact.",
 14 |       "properties": {
 15 |         "entry_id": {
 16 |           "title": "Entry Id",
 17 |           "type": "string"
 18 |         }
 19 |       },
 20 |       "required": [
 21 |         "entry_id"
 22 |       ],
 23 |       "title": "SourceRef",
 24 |       "type": "object"
 25 |     }
 26 |   },
 27 |   "description": "Facet modification proposed by characterization pipelines.",
 28 |   "properties": {
 29 |     "confidence": {
 30 |       "anyOf": [
 31 |         {
 32 |           "type": "number"
 33 |         },
 34 |         {
 35 |           "type": "null"
 36 |         }
 37 |       ],
 38 |       "default": null,
 39 |       "title": "Confidence"
 40 |     },
 41 |     "evidence": {
 42 |       "items": {
 43 |         "$ref": "#/$defs/SourceRef"
 44 |       },
 45 |       "title": "Evidence",
 46 |       "type": "array"
 47 |     },
 48 |     "method": {
 49 |       "anyOf": [
 50 |         {
 51 |           "type": "string"
 52 |         },
 53 |         {
 54 |           "type": "null"
 55 |         }
 56 |       ],
 57 |       "default": null,
 58 |       "title": "Method"
 59 |     },
 60 |     "operation": {
 61 |       "$ref": "#/$defs/FacetOperation"
 62 |     },
 63 |     "path": {
 64 |       "title": "Path",
 65 |       "type": "string"
 66 |     },
 67 |     "rationale": {
 68 |       "anyOf": [
 69 |         {
 70 |           "type": "string"
 71 |         },
 72 |         {
 73 |           "type": "null"
 74 |         }
 75 |       ],
 76 |       "default": null,
 77 |       "title": "Rationale"
 78 |     },
 79 |     "review_after_days": {
 80 |       "anyOf": [
 81 |         {
 82 |           "type": "integer"
 83 |         },
 84 |         {
 85 |           "type": "null"
 86 |         }
 87 |       ],
 88 |       "default": null,
 89 |       "title": "Review After Days"
 90 |     },
 91 |     "user_verified": {
 92 |       "anyOf": [
 93 |         {
 94 |           "type": "boolean"
 95 |         },
 96 |         {
 97 |           "type": "null"
 98 |         }
 99 |       ],
100 |       "default": null,
101 |       "title": "User Verified"
102 |     },
103 |     "value": {
104 |       "anyOf": [
105 |         {},
106 |         {
107 |           "type": "null"
108 |         }
109 |       ],
110 |       "default": null,
111 |       "title": "Value"
112 |     }
113 |   },
114 |   "required": [
115 |     "path",
116 |     "operation"
117 |   ],
118 |   "title": "FacetChange",
119 |   "type": "object"
120 | }
121 | 


--------------------------------------------------------------------------------
/tests/services/capture/test_stage_persona.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | from aijournal.common.app_config import AppConfig
 6 | from aijournal.services.capture import CaptureInput
 7 | from aijournal.services.capture.stages import stage7_persona
 8 | 
 9 | if TYPE_CHECKING:
10 |     from pathlib import Path
11 | 
12 | 
13 | def _make_inputs() -> CaptureInput:
14 |     return CaptureInput(source="stdin", text="Persona test")
15 | 
16 | 
17 | def test_stage7_persona_triggers_build(tmp_path: Path, monkeypatch) -> None:
18 |     persona_dir = tmp_path / "derived" / "persona"
19 |     persona_dir.mkdir(parents=True, exist_ok=True)
20 | 
21 |     states = [("stale", []), ("fresh", [])]
22 | 
23 |     monkeypatch.setattr(
24 |         "aijournal.commands.persona.persona_state",
25 |         lambda root, workspace, config: states.pop(0),
26 |     )
27 | 
28 |     monkeypatch.setattr(
29 |         "aijournal.commands.profile.load_profile_components",
30 |         lambda *_, **__: (object(), []),
31 |     )
32 | 
33 |     monkeypatch.setattr(
34 |         "aijournal.commands.profile.profile_to_dict",
35 |         lambda profile: {"name": "profile"},
36 |     )
37 | 
38 |     monkeypatch.setattr(
39 |         "aijournal.common.config_loader.load_config",
40 |         lambda root: {},
41 |     )
42 | 
43 |     persona_path = persona_dir / "persona_core.yaml"
44 | 
45 |     def fake_build(profile, claim_models, *, config, root):
46 |         persona_path.write_text("persona", encoding="utf-8")
47 |         return persona_path, True
48 | 
49 |     monkeypatch.setattr("aijournal.commands.persona.run_persona_build", fake_build)
50 | 
51 |     config = AppConfig()
52 |     outputs = stage7_persona.run_persona_stage_7(
53 |         _make_inputs(),
54 |         tmp_path,
55 |         config,
56 |         {"profile": 1},
57 |     )
58 | 
59 |     assert outputs.result.changed is True
60 |     assert outputs.persona_changed is True
61 |     assert outputs.persona_stale_before is True
62 |     assert outputs.persona_stale_after is False
63 | 
64 | 
65 | def test_stage7_persona_noop_when_fresh(tmp_path: Path, monkeypatch) -> None:
66 |     monkeypatch.setattr(
67 |         "aijournal.commands.persona.persona_state",
68 |         lambda root, workspace, config: ("fresh", []),
69 |     )
70 |     monkeypatch.setattr(
71 |         "aijournal.commands.profile.load_profile_components",
72 |         lambda *_, **__: (None, []),
73 |     )
74 | 
75 |     config = AppConfig()
76 |     outputs = stage7_persona.run_persona_stage_7(
77 |         _make_inputs(),
78 |         tmp_path,
79 |         config,
80 |         {},
81 |     )
82 | 
83 |     assert outputs.result.changed is False
84 |     assert outputs.result.ok is True
85 |     assert outputs.persona_changed is False
86 |     assert outputs.persona_stale_before is False
87 | 


--------------------------------------------------------------------------------
/tests/commands/test_microfact_prompts.py:
--------------------------------------------------------------------------------
 1 | """Tests ensuring consolidated microfacts flow into profile update prompts."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import json
 6 | from typing import TYPE_CHECKING
 7 | 
 8 | from aijournal.commands import profile_update as profile_update_module
 9 | from aijournal.common.app_config import AppConfig
10 | from aijournal.domain.facts import ConsolidatedMicroFact, ConsolidatedMicrofactsFile
11 | 
12 | if TYPE_CHECKING:
13 |     from pathlib import Path
14 | 
15 | 
16 | def _sample_consolidated() -> ConsolidatedMicrofactsFile:
17 |     return ConsolidatedMicrofactsFile(
18 |         generated_at="2025-01-05T00:00:00Z",
19 |         embedding_model="fake-model",
20 |         facts=[
21 |             ConsolidatedMicroFact(
22 |                 id="recurring.focus",
23 |                 statement="Blocks 8-10am for deep work",
24 |                 canonical_statement="blocks 8-10am for deep work",
25 |                 confidence=0.82,
26 |                 first_seen="2025-01-01",
27 |                 last_seen="2025-01-05",
28 |                 observation_count=3,
29 |                 domain="journal",
30 |                 contexts=["focus"],
31 |                 evidence_entries=["entry-1", "entry-2"],
32 |                 source_fact_ids=["2025-01-01:focus"],
33 |             ),
34 |         ],
35 |     )
36 | 
37 | 
38 | def test_profile_update_consolidated_payload(monkeypatch, tmp_path: Path) -> None:
39 |     monkeypatch.setattr(
40 |         profile_update_module,
41 |         "load_consolidated_microfacts",
42 |         lambda workspace, config: _sample_consolidated(),
43 |     )
44 |     monkeypatch.setattr(
45 |         profile_update_module,
46 |         "select_recurring_facts",
47 |         lambda snapshot, **_: [
48 |             {
49 |                 "statement": fact.statement,
50 |                 "observation_count": fact.observation_count,
51 |                 "first_seen": fact.first_seen,
52 |                 "last_seen": fact.last_seen,
53 |                 "contexts": fact.contexts,
54 |                 "evidence_entries": fact.evidence_entries,
55 |             }
56 |             for fact in snapshot.facts
57 |         ],
58 |     )
59 | 
60 |     payload = profile_update_module._load_consolidated_facts_json(tmp_path, AppConfig())
61 |     consolidated_payload = json.loads(payload)
62 |     assert consolidated_payload["facts"][0]["observation_count"] == 3
63 | 
64 | 
65 | def test_profile_update_consolidated_payload_missing_snapshot(
66 |     monkeypatch,
67 |     tmp_path: Path,
68 | ) -> None:
69 |     monkeypatch.setattr(
70 |         profile_update_module,
71 |         "load_consolidated_microfacts",
72 |         lambda workspace, config: None,
73 |     )
74 | 
75 |     payload = profile_update_module._load_consolidated_facts_json(tmp_path, AppConfig())
76 |     assert payload == "{}"
77 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.packs.PackBundle.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "$defs": {
  3 |     "PackEntry": {
  4 |       "description": "Single file included in an export pack.",
  5 |       "properties": {
  6 |         "content": {
  7 |           "title": "Content",
  8 |           "type": "string"
  9 |         },
 10 |         "path": {
 11 |           "title": "Path",
 12 |           "type": "string"
 13 |         },
 14 |         "role": {
 15 |           "title": "Role",
 16 |           "type": "string"
 17 |         },
 18 |         "tokens": {
 19 |           "title": "Tokens",
 20 |           "type": "integer"
 21 |         }
 22 |       },
 23 |       "required": [
 24 |         "role",
 25 |         "path",
 26 |         "tokens",
 27 |         "content"
 28 |       ],
 29 |       "title": "PackEntry",
 30 |       "type": "object"
 31 |     },
 32 |     "PackMeta": {
 33 |       "description": "Metadata describing the assembled pack.",
 34 |       "properties": {
 35 |         "generated_at": {
 36 |           "title": "Generated At",
 37 |           "type": "string"
 38 |         },
 39 |         "max_tokens": {
 40 |           "title": "Max Tokens",
 41 |           "type": "integer"
 42 |         },
 43 |         "total_tokens": {
 44 |           "title": "Total Tokens",
 45 |           "type": "integer"
 46 |         },
 47 |         "trimmed": {
 48 |           "items": {
 49 |             "$ref": "#/$defs/TrimmedFile"
 50 |           },
 51 |           "title": "Trimmed",
 52 |           "type": "array"
 53 |         }
 54 |       },
 55 |       "required": [
 56 |         "total_tokens",
 57 |         "max_tokens",
 58 |         "generated_at"
 59 |       ],
 60 |       "title": "PackMeta",
 61 |       "type": "object"
 62 |     },
 63 |     "TrimmedFile": {
 64 |       "description": "Record of a file trimmed due to token budget limits.",
 65 |       "properties": {
 66 |         "path": {
 67 |           "title": "Path",
 68 |           "type": "string"
 69 |         },
 70 |         "role": {
 71 |           "title": "Role",
 72 |           "type": "string"
 73 |         }
 74 |       },
 75 |       "required": [
 76 |         "role",
 77 |         "path"
 78 |       ],
 79 |       "title": "TrimmedFile",
 80 |       "type": "object"
 81 |     }
 82 |   },
 83 |   "description": "Structured representation of a pack export.",
 84 |   "properties": {
 85 |     "date": {
 86 |       "title": "Date",
 87 |       "type": "string"
 88 |     },
 89 |     "files": {
 90 |       "items": {
 91 |         "$ref": "#/$defs/PackEntry"
 92 |       },
 93 |       "title": "Files",
 94 |       "type": "array"
 95 |     },
 96 |     "level": {
 97 |       "title": "Level",
 98 |       "type": "string"
 99 |     },
100 |     "meta": {
101 |       "$ref": "#/$defs/PackMeta"
102 |     }
103 |   },
104 |   "required": [
105 |     "level",
106 |     "date",
107 |     "meta"
108 |   ],
109 |   "title": "PackBundle",
110 |   "type": "object"
111 | }
112 | 


--------------------------------------------------------------------------------
/tests/services/capture/test_stage_facts.py:
--------------------------------------------------------------------------------
 1 | """Tests for stage3_facts graceful error handling."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import TYPE_CHECKING, Never
 6 | 
 7 | import typer
 8 | 
 9 | from aijournal.common.app_config import AppConfig
10 | from aijournal.services.capture import CaptureInput
11 | from aijournal.services.capture.stages import stage3_facts
12 | 
13 | if TYPE_CHECKING:
14 |     from pathlib import Path
15 | 
16 | 
17 | def _make_inputs() -> CaptureInput:
18 |     return CaptureInput(source="stdin", text="Sample entry")
19 | 
20 | 
21 | def _make_config() -> AppConfig:
22 |     return AppConfig(
23 |         paths={
24 |             "data": "data",
25 |             "derived": "derived",
26 |             "profile": "profile",
27 |             "prompts": "prompts",
28 |         },
29 |     )
30 | 
31 | 
32 | def test_stage3_facts_success(tmp_path: Path, monkeypatch) -> None:
33 |     facts_path = tmp_path / "derived" / "microfacts" / "2025-10-27.yaml"
34 |     facts_path.parent.mkdir(parents=True, exist_ok=True)
35 | 
36 |     called: list[str] = []
37 | 
38 |     def fake_run(
39 |         date: str,
40 |         *,
41 |         progress: bool,
42 |         claim_models,
43 |         generate_preview: bool,
44 |         workspace: Path | None = None,
45 |         config: AppConfig | None = None,
46 |     ) -> tuple[None, Path]:
47 |         del generate_preview
48 |         called.append(date)
49 |         facts_path.write_text("facts", encoding="utf-8")
50 |         return None, facts_path
51 | 
52 |     def fake_load_profile(*args, **kwargs):
53 |         return None, []  # profile, claims
54 | 
55 |     monkeypatch.setattr("aijournal.commands.facts.run_facts", fake_run)
56 |     monkeypatch.setattr("aijournal.commands.profile.load_profile_components", fake_load_profile)
57 | 
58 |     outputs = stage3_facts.run_facts_stage_3(
59 |         ["2025-10-27"],
60 |         _make_inputs(),
61 |         tmp_path,
62 |         _make_config(),
63 |     )
64 | 
65 |     assert called == ["2025-10-27"]
66 |     assert outputs.result.ok is True
67 |     assert outputs.result.changed is True
68 |     assert outputs.paths == ["derived/microfacts/2025-10-27.yaml"]
69 | 
70 | 
71 | def test_stage3_facts_handles_failure(tmp_path: Path, monkeypatch) -> None:
72 |     def failing_run(*args, **kwargs) -> Never:
73 |         raise typer.Exit(1)
74 | 
75 |     def fake_load_profile(*args, **kwargs):
76 |         return None, []
77 | 
78 |     monkeypatch.setattr("aijournal.commands.facts.run_facts", failing_run)
79 |     monkeypatch.setattr("aijournal.commands.profile.load_profile_components", fake_load_profile)
80 | 
81 |     outputs = stage3_facts.run_facts_stage_3(
82 |         ["2025-10-27"],
83 |         _make_inputs(),
84 |         tmp_path,
85 |         _make_config(),
86 |     )
87 | 
88 |     assert outputs.result.ok is False
89 |     assert outputs.result.changed is False
90 |     assert outputs.result.warnings
91 |     assert outputs.paths == []
92 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.index.ChunkBatch.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "$defs": {
  3 |     "Chunk": {
  4 |       "description": "Normalized chunk persisted in the retrieval index.",
  5 |       "properties": {
  6 |         "chunk_id": {
  7 |           "title": "Chunk Id",
  8 |           "type": "string"
  9 |         },
 10 |         "chunk_index": {
 11 |           "title": "Chunk Index",
 12 |           "type": "integer"
 13 |         },
 14 |         "chunk_type": {
 15 |           "default": "entry",
 16 |           "title": "Chunk Type",
 17 |           "type": "string"
 18 |         },
 19 |         "date": {
 20 |           "title": "Date",
 21 |           "type": "string"
 22 |         },
 23 |         "manifest_hash": {
 24 |           "anyOf": [
 25 |             {
 26 |               "type": "string"
 27 |             },
 28 |             {
 29 |               "type": "null"
 30 |             }
 31 |           ],
 32 |           "default": null,
 33 |           "title": "Manifest Hash"
 34 |         },
 35 |         "normalized_id": {
 36 |           "title": "Normalized Id",
 37 |           "type": "string"
 38 |         },
 39 |         "source_hash": {
 40 |           "anyOf": [
 41 |             {
 42 |               "type": "string"
 43 |             },
 44 |             {
 45 |               "type": "null"
 46 |             }
 47 |           ],
 48 |           "default": null,
 49 |           "title": "Source Hash"
 50 |         },
 51 |         "source_path": {
 52 |           "title": "Source Path",
 53 |           "type": "string"
 54 |         },
 55 |         "source_type": {
 56 |           "anyOf": [
 57 |             {
 58 |               "type": "string"
 59 |             },
 60 |             {
 61 |               "type": "null"
 62 |             }
 63 |           ],
 64 |           "default": null,
 65 |           "title": "Source Type"
 66 |         },
 67 |         "tags": {
 68 |           "items": {
 69 |             "type": "string"
 70 |           },
 71 |           "title": "Tags",
 72 |           "type": "array"
 73 |         },
 74 |         "text": {
 75 |           "title": "Text",
 76 |           "type": "string"
 77 |         },
 78 |         "tokens": {
 79 |           "title": "Tokens",
 80 |           "type": "integer"
 81 |         }
 82 |       },
 83 |       "required": [
 84 |         "chunk_id",
 85 |         "normalized_id",
 86 |         "chunk_index",
 87 |         "text",
 88 |         "date",
 89 |         "source_path",
 90 |         "tokens"
 91 |       ],
 92 |       "title": "Chunk",
 93 |       "type": "object"
 94 |     }
 95 |   },
 96 |   "description": "Exported chunk set for a given journal day.",
 97 |   "properties": {
 98 |     "chunks": {
 99 |       "items": {
100 |         "$ref": "#/$defs/Chunk"
101 |       },
102 |       "title": "Chunks",
103 |       "type": "array"
104 |     },
105 |     "day": {
106 |       "title": "Day",
107 |       "type": "string"
108 |     }
109 |   },
110 |   "required": [
111 |     "day"
112 |   ],
113 |   "title": "ChunkBatch",
114 |   "type": "object"
115 | }
116 | 


--------------------------------------------------------------------------------
/src/aijournal/services/embedding.py:
--------------------------------------------------------------------------------
 1 | """Embedding helpers shared across indexing and retrieval."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import random
 6 | from dataclasses import dataclass, field
 7 | from hashlib import sha256
 8 | from typing import TYPE_CHECKING
 9 | 
10 | import httpx
11 | 
12 | from aijournal.common.constants import DEFAULT_EMBED_DIM, EMBED_TIMEOUT
13 | from aijournal.services.ollama import resolve_ollama_host
14 | 
15 | if TYPE_CHECKING:
16 |     from collections.abc import Sequence
17 | 
18 | 
19 | @dataclass
20 | class EmbeddingBackend:
21 |     """Thin wrapper that returns deterministic vectors in fake mode."""
22 | 
23 |     model: str
24 |     host: str | None = None
25 |     fake_mode: bool = False
26 |     dimension: int | None = None
27 |     _base_host: str = field(init=False)
28 | 
29 |     def __post_init__(self) -> None:
30 |         """Normalize and cache the base Ollama host for repeated calls."""
31 |         self._base_host = resolve_ollama_host(self.host)
32 | 
33 |     def embed(self, texts: Sequence[str]) -> list[list[float]]:
34 |         vectors: list[list[float]] = []
35 |         if not texts:
36 |             return vectors
37 |         if self.fake_mode:
38 |             return [self._fake_embed(text) for text in texts]
39 | 
40 |         endpoint = f"{self._base_host}/api/embeddings"
41 |         try:
42 |             with httpx.Client(timeout=EMBED_TIMEOUT) as session:
43 |                 for text in texts:
44 |                     response = session.post(
45 |                         endpoint,
46 |                         json={
47 |                             "model": self.model,
48 |                             "prompt": text,
49 |                         },
50 |                     )
51 |                     response.raise_for_status()
52 |                     payload = response.json()
53 |                     vector = payload.get("embedding")
54 |                     if not isinstance(vector, list):
55 |                         msg = "Ollama embedding response missing vector payload"
56 |                         raise TypeError(msg)
57 |                     if self.dimension is None:
58 |                         self.dimension = len(vector)
59 |                     vectors.append([float(value) for value in vector])
60 |         except httpx.HTTPError as exc:
61 |             msg = f"Ollama embedding request failed: {exc}"
62 |             raise RuntimeError(msg) from exc
63 |         return vectors
64 | 
65 |     def embed_one(self, text: str) -> list[float]:
66 |         return self.embed([text])[0] if text else [0.0] * self.dim
67 | 
68 |     @property
69 |     def dim(self) -> int:
70 |         return self.dimension or DEFAULT_EMBED_DIM
71 | 
72 |     def _fake_embed(self, text: str) -> list[float]:
73 |         seed = int.from_bytes(sha256(text.encode("utf-8")).digest()[:8], "big")
74 |         rng = random.Random(seed)  # noqa: S311 - deterministic fake embeddings for tests
75 |         dim = self.dimension or DEFAULT_EMBED_DIM
76 |         self.dimension = dim
77 |         return [rng.uniform(-1.0, 1.0) for _ in range(dim)]
78 | 


--------------------------------------------------------------------------------
/docs/design/profile_update_inventory.md:
--------------------------------------------------------------------------------
 1 | # Profile Update Surface Inventory
 2 | 
 3 | _Updated: 2025-11-14_
 4 | 
 5 | > Status: Prompt3 cleanup is complete. The list below is preserved for historical/audit context so future agents know which legacy surfaces were removed or renamed when `profile_update` replaced the old flows.
 6 | 
 7 | This note originally captured every code surface that referenced the legacy
 8 | `profile_suggest` / `characterize` flows. Capture now routes exclusively
 9 | through the unified `profile_update` stage.
10 | 
11 | ## Command / CLI entry points
12 | 
13 | - `src/aijournal/cli.py`
14 |   - `profile.suggest` command → `run_profile_suggest`
15 |   - `profile.apply` consumers expect artifacts from stage 4
16 |   - `characterize` command and `ops pipeline characterize` wrapper
17 | - `src/aijournal/commands/profile.py`
18 |   - Houses `run_profile_suggest`, apply/status plumbing, summarizer helpers,
19 |     and derived artifact writers.
20 | - `src/aijournal/commands/characterize.py`
21 |   - Orchestrates LLM calls for stage 5, produces
22 |     `derived/pending/profile_updates/*.yaml` batches.
23 | 
24 | ## Capture pipeline integration
25 | 
26 | - ✅ `src/aijournal/services/capture/__init__.py` now invokes the unified
27 |   `derive.profile_update` stage (wrapping `stage4_profile_update.py`).
28 | - ✅ `src/aijournal/services/capture/stages/stage4_profile_update.py` replaces
29 |   the old stage4/stage5 pair.
30 | - `src/aijournal/services/capture/graceful.py` still exposes the legacy
31 |   wrappers, though capture no longer consumes them.
32 | 
33 | ## Prompt + DTO definitions
34 | 
35 | - `prompts/profile_suggest.md` and `prompts/characterize.md` define the old
36 |   LLM contracts.
37 | - `prompts/examples/profile_suggest.json` and `prompts/examples/characterize.json`
38 |   back the example validation tests.
39 | - `src/aijournal/domain/prompts.py`
40 |   - DTO containers (`PromptProfileUpdates`, `PromptClaimItem`, etc.) used by
41 |     both prompts, plus converters into `ProfileUpdateProposals`.
42 | 
43 | ## Pipeline helpers
44 | 
45 | - `src/aijournal/pipelines/characterize.py`
46 |   - Shared normalization and fake-mode logic for stage 5.
47 | - `src/aijournal/pipelines/facts.py`
48 |   - Provides normalization helpers that stage 4 and 5 import when converting
49 |     LLM-emitted claim DTOs.
50 | 
51 | ## Tests and fixtures
52 | 
53 | - `tests/prompts/test_prompt_examples.py` exercises
54 |   `profile_suggest.json` + `characterize.json` payloads.
55 | - `tests/cli/test_cli_profile_suggest.py`, `tests/test_cli_characterize.py`,
56 |   and capture-service tests (`tests/services/test_capture.py`,
57 |   `tests/services/capture/test_stage_profile.py`, etc.) assert both stages run.
58 | - Simulator validators (`tests/simulator/validators.py`) expect stage 4 and 5
59 |   artifacts when replaying capture runs.
60 | 
61 | This inventory will guide the remaining Prompt 3 workstreams:
62 | 
63 | 1. Introduce the new unified prompt (`prompts/profile_update.md`) and pipeline.
64 | 2. Wire a single `profile_update` stage into capture + CLI.
65 | 3. Delete/retire every surface listed above once parity tests pass.
66 | 


--------------------------------------------------------------------------------
/schemas/core/aijournal.domain.facts.ConsolidatedMicrofactsFile.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "$defs": {
  3 |     "ConsolidatedMicroFact": {
  4 |       "description": "Global microfact entry that survives consolidation runs.",
  5 |       "properties": {
  6 |         "canonical_statement": {
  7 |           "title": "Canonical Statement",
  8 |           "type": "string"
  9 |         },
 10 |         "confidence": {
 11 |           "title": "Confidence",
 12 |           "type": "number"
 13 |         },
 14 |         "contexts": {
 15 |           "items": {
 16 |             "type": "string"
 17 |           },
 18 |           "title": "Contexts",
 19 |           "type": "array"
 20 |         },
 21 |         "domain": {
 22 |           "anyOf": [
 23 |             {
 24 |               "type": "string"
 25 |             },
 26 |             {
 27 |               "type": "null"
 28 |             }
 29 |           ],
 30 |           "default": null,
 31 |           "title": "Domain"
 32 |         },
 33 |         "evidence_entries": {
 34 |           "items": {
 35 |             "type": "string"
 36 |           },
 37 |           "title": "Evidence Entries",
 38 |           "type": "array"
 39 |         },
 40 |         "first_seen": {
 41 |           "title": "First Seen",
 42 |           "type": "string"
 43 |         },
 44 |         "id": {
 45 |           "title": "Id",
 46 |           "type": "string"
 47 |         },
 48 |         "last_seen": {
 49 |           "title": "Last Seen",
 50 |           "type": "string"
 51 |         },
 52 |         "observation_count": {
 53 |           "title": "Observation Count",
 54 |           "type": "integer"
 55 |         },
 56 |         "source_fact_ids": {
 57 |           "items": {
 58 |             "type": "string"
 59 |           },
 60 |           "title": "Source Fact Ids",
 61 |           "type": "array"
 62 |         },
 63 |         "statement": {
 64 |           "title": "Statement",
 65 |           "type": "string"
 66 |         }
 67 |       },
 68 |       "required": [
 69 |         "id",
 70 |         "statement",
 71 |         "canonical_statement",
 72 |         "confidence",
 73 |         "first_seen",
 74 |         "last_seen",
 75 |         "observation_count"
 76 |       ],
 77 |       "title": "ConsolidatedMicroFact",
 78 |       "type": "object"
 79 |     }
 80 |   },
 81 |   "description": "Artifact capturing the global consolidated microfact snapshot.",
 82 |   "properties": {
 83 |     "embedding_model": {
 84 |       "anyOf": [
 85 |         {
 86 |           "type": "string"
 87 |         },
 88 |         {
 89 |           "type": "null"
 90 |         }
 91 |       ],
 92 |       "default": null,
 93 |       "title": "Embedding Model"
 94 |     },
 95 |     "facts": {
 96 |       "items": {
 97 |         "$ref": "#/$defs/ConsolidatedMicroFact"
 98 |       },
 99 |       "title": "Facts",
100 |       "type": "array"
101 |     },
102 |     "generated_at": {
103 |       "title": "Generated At",
104 |       "type": "string"
105 |     }
106 |   },
107 |   "required": [
108 |     "generated_at"
109 |   ],
110 |   "title": "ConsolidatedMicrofactsFile",
111 |   "type": "object"
112 | }
113 | 


--------------------------------------------------------------------------------
/src/aijournal/services/capture/stages/stage6_index.py:
--------------------------------------------------------------------------------
 1 | """Stage 6: refresh the retrieval index artifacts."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from time import perf_counter
 6 | from typing import TYPE_CHECKING, Literal
 7 | 
 8 | import typer
 9 | 
10 | if TYPE_CHECKING:
11 |     from collections.abc import Sequence
12 |     from pathlib import Path
13 | 
14 |     from aijournal.services.capture import IndexStage6Outputs
15 | 
16 | 
17 | def run_index_stage_6(
18 |     changed_dates: Sequence[str],
19 |     root: Path,
20 |     rebuild_mode: Literal["auto", "always", "skip"] = "auto",
21 | ) -> IndexStage6Outputs:
22 |     from aijournal.commands.index import run_index_rebuild, run_index_tail
23 |     from aijournal.services.capture import IndexStage6Outputs
24 |     from aijournal.services.capture.results import OperationResult
25 |     from aijournal.services.capture.utils import relative_path
26 | 
27 |     stage_start = perf_counter()
28 |     index_message = ""
29 |     index_error: str | None = None
30 |     index_updated = False
31 |     rebuilt = False
32 |     force_rebuild = rebuild_mode == "always"
33 |     changed_dates_list = list(changed_dates)
34 |     try:
35 |         chroma_dir = root / "derived" / "index" / "chroma"
36 |         if force_rebuild or not chroma_dir.exists():
37 |             index_message = run_index_rebuild(since=None, limit=None)
38 |             rebuilt = True
39 |             index_updated = True
40 |         elif changed_dates_list:
41 |             since = min(changed_dates_list)
42 |             index_message = run_index_tail(since=since, days=7, limit=None)
43 |             if not index_message or "already up to date" not in index_message.lower():
44 |                 index_updated = True
45 |         else:
46 |             index_message = "no capture changes detected"
47 |     except typer.Exit as exc:
48 |         if exc.exit_code not in (0,):
49 |             index_error = str(exc)
50 |     except Exception as exc:  # pragma: no cover - defensive
51 |         index_error = str(exc)
52 |     duration_ms = (perf_counter() - stage_start) * 1000.0
53 |     index_details: dict[str, object] = {
54 |         "message": index_message,
55 |         "rebuild": rebuilt,
56 |         "mode": rebuild_mode,
57 |     }
58 |     if index_error is not None:
59 |         op_result = OperationResult.fail(
60 |             f"index update failed: {index_error}",
61 |             details=index_details,
62 |         )
63 |     elif index_updated:
64 |         index_artifacts = [
65 |             relative_path(root / "derived" / "index" / "chroma", root),
66 |             relative_path(root / "derived" / "index" / "meta.json", root),
67 |         ]
68 |         op_result = OperationResult.wrote(
69 |             index_artifacts,
70 |             message=index_message or "index refreshed",
71 |             details=index_details,
72 |         )
73 |     else:
74 |         op_result = OperationResult.noop(
75 |             index_message or "index already up to date",
76 |             details=index_details,
77 |         )
78 |     return IndexStage6Outputs(op_result, duration_ms, index_updated, rebuilt)
79 | 


--------------------------------------------------------------------------------