From 8fbd3dbc709e5356cddd1645fa8e7bff2770d48f Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Fri, 19 Jun 2026 14:44:58 +0800
Subject: [PATCH] {"schema":"decodex/commit/1","summary":"Materialize
 service-native Dreaming readback","authority":"XY-986"}

---
 Makefile.toml                                 |    9 +
 README.md                                     |   16 +-
 ...ive-dreaming-readback-materialization.json |  324 +
 ...rvice-native-dreaming-readback-report.json | 5231 +++++++++++++++++
 .../src/bin/real_world_live_adapter.rs        |  607 +-
 .../tests/real_world_job_benchmark.rs         |  183 +
 ...service-native-dreaming-readback-report.md |  128 +
 docs/evidence/benchmarking/index.md           |    1 +
 docs/log.md                                   |    4 +
 scripts/real-world-docker.sh                  |    6 +
 scripts/real-world-dreaming-service-native.sh |   88 +
 11 files changed, 6549 insertions(+), 48 deletions(-)
 create mode 100644 apps/elf-eval/fixtures/report_snapshots/2026-06-19-service-native-dreaming-readback-materialization.json
 create mode 100644 apps/elf-eval/fixtures/report_snapshots/2026-06-19-service-native-dreaming-readback-report.json
 create mode 100644 docs/evidence/benchmarking/2026-06-19-service-native-dreaming-readback-report.md
 create mode 100755 scripts/real-world-dreaming-service-native.sh

diff --git a/Makefile.toml b/Makefile.toml
index 4505b75e..59c8ed47 100644
--- a/Makefile.toml
+++ b/Makefile.toml
@@ -59,6 +59,7 @@
 # | real-world-memory-scheduled                | composite |     |
 # | real-world-memory-scheduled-json           | command   |     |
 # | real-world-memory-scheduled-report         | command   |     |
+# | real-world-memory-service-native-dreaming  | command   |     |
 # | real-world-memory-summary                  | composite |     |
 # | real-world-memory-summary-json             | command   |     |
 # | real-world-memory-summary-report           | command   |     |
@@ -865,6 +866,14 @@ args = [
 	"tmp/real-world-memory/scheduled/report.md",
 ]
 
+[tasks.real-world-memory-service-native-dreaming]
+workspace = false
+command = "bash"
+args = [
+	"scripts/real-world-docker.sh",
+	"memory-service-native-dreaming",
+]
+
 [tasks.real-world-memory-summary]
 workspace = false
 dependencies = [
diff --git a/README.md b/README.md
index 0c5eb979..7f143161 100644
--- a/README.md
+++ b/README.md
@@ -199,6 +199,14 @@ provider-backed ELF evidence was required.
   competitive status unchanged: no ELF-over-Letta win, tie, or loss is allowed until
   exported Letta core block JSON, archival readback/search JSON, and fixture source ids
   are present.
+- Service-native Dreaming readback after XY-986: the June 19 follow-up adds
+  `cargo make real-world-memory-service-native-dreaming`, a Docker-contained ELF
+  service readback command for `memory_summary`, `proactive_brief`, and
+  `scheduled_memory`. The slice scores 9 pass, 0 wrong_result, and 2 typed XY-930
+  private/provider blockers with 22/22 evidence, source-ref, and quote coverage.
+  This improves local Dreaming runtime authority and auditability, but it does not
+  prove Pulse, ChatGPT Tasks, Claude Dreams, hosted managed-memory, or private-corpus
+  parity.
 - Full-suite live real-world adapter sweep after XY-926: ELF and qmd emit
   Docker-isolated `live_real_world` records for all 55 checked-in jobs across 13 suites
   through `cargo make real-world-memory-live-adapters`. Both keep the original
@@ -309,6 +317,7 @@ Detailed evidence and interpretation:
 - [Dreaming Competitor-Strength Retest Report - June 17, 2026](docs/evidence/benchmarking/2026-06-17-dreaming-competitor-strength-retest-report.md)
 - [qmd Debug-Ergonomics Dreaming Retest Report - June 19, 2026](docs/evidence/benchmarking/2026-06-19-qmd-debug-ergonomics-dreaming-retest-report.md)
 - [OpenViking Trajectory Materialization Report - June 19, 2026](docs/evidence/benchmarking/2026-06-19-openviking-trajectory-materialization-report.md)
+- [Service-Native Dreaming Readback Report - June 19, 2026](docs/evidence/benchmarking/2026-06-19-service-native-dreaming-readback-report.md)
 - [Live Baseline Benchmark Runbook](docs/runbook/benchmarking/live_baseline_benchmark.md)
 - [Single-User Production Runbook](docs/runbook/single_user_production.md)
 - Benchmark contract:
@@ -406,9 +415,10 @@ Detailed comparison, mechanism-level analysis, and source map:
 - [Dreaming Product Surface Follow-Up Research](docs/research/dreaming_product_surface_followup.md)
 
 Latest real-world benchmark report: June 19, 2026. Latest external research refresh:
-June 11, 2026; June 19 adds the qmd debug-ergonomics Dreaming retest after the June
-17 competitor-strength closeout and the June 16 temporal reconciliation, live
-consolidation self-check, proactive-brief, and scheduled-memory scoring evidence.
+June 11, 2026; June 19 adds service-native Dreaming readback after the qmd
+debug-ergonomics Dreaming retest, the June 17 competitor-strength closeout, and the
+June 16 temporal reconciliation, live consolidation self-check, proactive-brief, and
+scheduled-memory scoring evidence.
 
 ## Documentation
 
diff --git a/apps/elf-eval/fixtures/report_snapshots/2026-06-19-service-native-dreaming-readback-materialization.json b/apps/elf-eval/fixtures/report_snapshots/2026-06-19-service-native-dreaming-readback-materialization.json
new file mode 100644
index 00000000..0d5d99ae
--- /dev/null
+++ b/apps/elf-eval/fixtures/report_snapshots/2026-06-19-service-native-dreaming-readback-materialization.json
@@ -0,0 +1,324 @@
+{
+  "schema": "elf.real_world_live_adapter_materialization/v1",
+  "adapter_id": "elf_service_native_dreaming",
+  "adapter_kind": "elf_service_runtime",
+  "status": "blocked",
+  "fixtures": "/workspace/tmp/real-world-memory/service-native-dreaming/input-fixtures",
+  "generated_fixtures": "/workspace/tmp/real-world-memory/service-native-dreaming/elf-fixtures",
+  "command_evidence": [
+    {
+      "label": "elf_service_runtime",
+      "status": "blocked",
+      "command": "cargo run -p elf-eval --bin real_world_live_adapter -- elf",
+      "artifact": "/workspace/tmp/real-world-memory/service-native-dreaming/elf-materialization.json",
+      "reason": "ELF live adapter used ElfService, worker indexing, and search_raw."
+    }
+  ],
+  "jobs": [
+    {
+      "job_id": "memory-summary-source-trace-001",
+      "suite": "memory_summary",
+      "title": "Read back a reviewable current memory summary with source trace",
+      "status": "pass",
+      "query": "Show the current memory summary surface and explain why stale, tombstoned, and unsupported derived memories are not top-of-mind current facts.",
+      "evidence_ids": [
+        "summary-contract-current",
+        "xy952-summary-contract",
+        "summary-ttl-tombstone",
+        "summary-contract-non-parity-boundary"
+      ],
+      "returned_count": 5,
+      "latency_ms": 51.676775,
+      "trace_id": "2e80669d-2bcf-4238-b780-9b42aa72d2a2",
+      "failure": null,
+      "dreaming_readback": {
+        "artifact_kind": "elf.memory_summary/v1",
+        "runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
+        "service_list_count": 9,
+        "trace_id": "2e80669d-2bcf-4238-b780-9b42aa72d2a2",
+        "generated_artifact_count": 1,
+        "selected_source_refs": [
+          "stale-summary-gap",
+          "summary-background-sot",
+          "summary-contract-current",
+          "summary-contract-non-parity-boundary",
+          "summary-temporary-claim",
+          "summary-ttl-tombstone",
+          "superseded-live-evolution-loss",
+          "xy952-summary-contract"
+        ],
+        "missing_source_refs": [],
+        "source_mutation_count": 0,
+        "no_source_mutation_checked": true
+      }
+    },
+    {
+      "job_id": "proactive-daily-project-brief-001",
+      "suite": "proactive_brief",
+      "title": "Generate a daily project brief from current project memory",
+      "status": "pass",
+      "query": "Generate a daily project brief with only source-linked current recommendations.",
+      "evidence_ids": [
+        "daily-current-validation-gate",
+        "daily-current-ledger-update"
+      ],
+      "returned_count": 3,
+      "latency_ms": 6.884306,
+      "trace_id": "fc854889-2ac4-436b-a885-b43053922cb9",
+      "failure": null,
+      "dreaming_readback": {
+        "artifact_kind": "elf.proactive_project_brief/v1",
+        "runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
+        "service_list_count": 3,
+        "trace_id": "fc854889-2ac4-436b-a885-b43053922cb9",
+        "generated_artifact_count": 1,
+        "selected_source_refs": [
+          "daily-current-ledger-update",
+          "daily-current-validation-gate",
+          "daily-old-parity-trap"
+        ],
+        "missing_source_refs": [],
+        "source_mutation_count": 0,
+        "no_source_mutation_checked": true
+      }
+    },
+    {
+      "job_id": "proactive-private-corpus-refresh-blocked-001",
+      "suite": "proactive_brief",
+      "title": "Block private-corpus refresh suggestions when no operator manifest exists",
+      "status": "blocked",
+      "query": "Suggest a private-corpus refresh when private inputs exist.",
+      "evidence_ids": [],
+      "returned_count": 0,
+      "latency_ms": 0.0,
+      "trace_id": null,
+      "failure": "No operator-owned private production corpus manifest is available; private-corpus refresh suggestions stay blocked under XY-930."
+    },
+    {
+      "job_id": "proactive-resume-work-brief-001",
+      "suite": "proactive_brief",
+      "title": "Generate a resume-work brief from current handoff memory",
+      "status": "pass",
+      "query": "Generate a resume-work brief that identifies the current next action and validation command.",
+      "evidence_ids": [
+        "resume-current-handoff",
+        "resume-current-validation"
+      ],
+      "returned_count": 3,
+      "latency_ms": 7.336724,
+      "trace_id": "c77d3ddb-d0c0-4168-a528-a585adfc8a7f",
+      "failure": null,
+      "dreaming_readback": {
+        "artifact_kind": "elf.proactive_project_brief/v1",
+        "runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
+        "service_list_count": 3,
+        "trace_id": "c77d3ddb-d0c0-4168-a528-a585adfc8a7f",
+        "generated_artifact_count": 1,
+        "selected_source_refs": [
+          "resume-current-handoff",
+          "resume-current-validation",
+          "resume-stale-validation"
+        ],
+        "missing_source_refs": [],
+        "source_mutation_count": 0,
+        "no_source_mutation_checked": true
+      }
+    },
+    {
+      "job_id": "proactive-stale-decision-audit-001",
+      "suite": "proactive_brief",
+      "title": "Warn about a stale project decision before suggesting work",
+      "status": "pass",
+      "query": "Audit stale project decisions before generating proactive suggestions.",
+      "evidence_ids": [
+        "stale-decision-old-gate",
+        "stale-decision-new-gate"
+      ],
+      "returned_count": 2,
+      "latency_ms": 9.269810999999999,
+      "trace_id": "d7decd9a-d635-41b5-9dcc-c6e3c5c44fb7",
+      "failure": null,
+      "dreaming_readback": {
+        "artifact_kind": "elf.proactive_project_brief/v1",
+        "runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
+        "service_list_count": 2,
+        "trace_id": "d7decd9a-d635-41b5-9dcc-c6e3c5c44fb7",
+        "generated_artifact_count": 1,
+        "selected_source_refs": [
+          "stale-decision-new-gate",
+          "stale-decision-old-gate"
+        ],
+        "missing_source_refs": [],
+        "source_mutation_count": 0,
+        "no_source_mutation_checked": true
+      }
+    },
+    {
+      "job_id": "proactive-stale-plan-preference-warning-001",
+      "suite": "proactive_brief",
+      "title": "Reject stale plan and preference suggestions after TTL invalidation",
+      "status": "pass",
+      "query": "Warn me about stale plans or preferences before making proactive suggestions.",
+      "evidence_ids": [
+        "stale-plan-ttl",
+        "current-preference-concise-brief"
+      ],
+      "returned_count": 5,
+      "latency_ms": 7.991892,
+      "trace_id": "f2e795b5-7ac4-4f7d-ab49-75392f6ba8a8",
+      "failure": null,
+      "dreaming_readback": {
+        "artifact_kind": "elf.proactive_project_brief/v1",
+        "runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
+        "service_list_count": 5,
+        "trace_id": "f2e795b5-7ac4-4f7d-ab49-75392f6ba8a8",
+        "generated_artifact_count": 1,
+        "selected_source_refs": [
+          "current-plan-run-gate",
+          "current-preference-concise-brief",
+          "old-preference-long-brief",
+          "stale-plan-old",
+          "stale-plan-ttl"
+        ],
+        "missing_source_refs": [],
+        "source_mutation_count": 0,
+        "no_source_mutation_checked": true
+      }
+    },
+    {
+      "job_id": "scheduled-knowledge-page-refresh-suggestion-001",
+      "suite": "scheduled_memory",
+      "title": "Suggest a knowledge-page refresh from scheduled memory",
+      "status": "pass",
+      "query": "Run the scheduled knowledge-page refresh suggestion task.",
+      "evidence_ids": [
+        "scheduled-knowledge-page-stale-finding",
+        "scheduled-knowledge-reviewable-refresh"
+      ],
+      "returned_count": 3,
+      "latency_ms": 6.31843,
+      "trace_id": "df5b34bc-b8bd-427c-a531-7c37ff2444c8",
+      "failure": null,
+      "dreaming_readback": {
+        "artifact_kind": "elf.scheduled_memory_task/v1",
+        "runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
+        "service_list_count": 3,
+        "trace_id": "df5b34bc-b8bd-427c-a531-7c37ff2444c8",
+        "generated_artifact_count": 1,
+        "selected_source_refs": [
+          "scheduled-knowledge-page-stale-finding",
+          "scheduled-knowledge-reviewable-refresh",
+          "scheduled-knowledge-silent-rewrite-trap"
+        ],
+        "missing_source_refs": [],
+        "source_mutation_count": 0,
+        "no_source_mutation_checked": true
+      }
+    },
+    {
+      "job_id": "scheduled-private-provider-scheduler-blocked-001",
+      "suite": "scheduled_memory",
+      "title": "Block private/provider scheduled tasks without operator inputs",
+      "status": "blocked",
+      "query": "Run private/provider scheduled memory tasks when operator inputs exist.",
+      "evidence_ids": [],
+      "returned_count": 0,
+      "latency_ms": 0.0,
+      "trace_id": null,
+      "failure": "No operator-owned private production corpus manifest, provider credentials, or hosted scheduler configuration is available; private/provider scheduled tasks stay blocked under XY-930."
+    },
+    {
+      "job_id": "scheduled-stale-decision-audit-001",
+      "suite": "scheduled_memory",
+      "title": "Audit a stale project decision during a scheduled task",
+      "status": "pass",
+      "query": "Run the scheduled stale decision audit.",
+      "evidence_ids": [
+        "scheduled-old-consolidation-only-decision",
+        "scheduled-current-direct-suite-decision"
+      ],
+      "returned_count": 2,
+      "latency_ms": 5.7482619999999995,
+      "trace_id": "3ca5cf35-007e-4c15-9dce-3983a7053e9a",
+      "failure": null,
+      "dreaming_readback": {
+        "artifact_kind": "elf.scheduled_memory_task/v1",
+        "runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
+        "service_list_count": 2,
+        "trace_id": "3ca5cf35-007e-4c15-9dce-3983a7053e9a",
+        "generated_artifact_count": 1,
+        "selected_source_refs": [
+          "scheduled-current-direct-suite-decision",
+          "scheduled-old-consolidation-only-decision"
+        ],
+        "missing_source_refs": [],
+        "source_mutation_count": 0,
+        "no_source_mutation_checked": true
+      }
+    },
+    {
+      "job_id": "scheduled-stale-preference-plan-audit-001",
+      "suite": "scheduled_memory",
+      "title": "Audit stale preferences and plans during a scheduled task",
+      "status": "pass",
+      "query": "Run the scheduled stale preference and plan audit.",
+      "evidence_ids": [
+        "scheduled-stale-old-plan",
+        "scheduled-stale-plan-expired",
+        "scheduled-current-trace-plan",
+        "scheduled-current-reviewable-preference"
+      ],
+      "returned_count": 5,
+      "latency_ms": 7.603808,
+      "trace_id": "8e5741df-c5d5-4e82-a32d-dc8606e8b876",
+      "failure": null,
+      "dreaming_readback": {
+        "artifact_kind": "elf.scheduled_memory_task/v1",
+        "runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
+        "service_list_count": 5,
+        "trace_id": "8e5741df-c5d5-4e82-a32d-dc8606e8b876",
+        "generated_artifact_count": 1,
+        "selected_source_refs": [
+          "scheduled-current-reviewable-preference",
+          "scheduled-current-trace-plan",
+          "scheduled-old-silent-mutation-preference",
+          "scheduled-stale-old-plan",
+          "scheduled-stale-plan-expired"
+        ],
+        "missing_source_refs": [],
+        "source_mutation_count": 0,
+        "no_source_mutation_checked": true
+      }
+    },
+    {
+      "job_id": "scheduled-weekly-project-status-summary-001",
+      "suite": "scheduled_memory",
+      "title": "Run a weekly project status summary from current memory",
+      "status": "pass",
+      "query": "Run the weekly project status summary scheduled task.",
+      "evidence_ids": [
+        "scheduled-weekly-current-gate",
+        "scheduled-weekly-ledger-update"
+      ],
+      "returned_count": 3,
+      "latency_ms": 5.362345,
+      "trace_id": "12bcc69c-4971-4cd5-9f58-16ae45772e7f",
+      "failure": null,
+      "dreaming_readback": {
+        "artifact_kind": "elf.scheduled_memory_task/v1",
+        "runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
+        "service_list_count": 3,
+        "trace_id": "12bcc69c-4971-4cd5-9f58-16ae45772e7f",
+        "generated_artifact_count": 1,
+        "selected_source_refs": [
+          "scheduled-weekly-current-gate",
+          "scheduled-weekly-hosted-parity-trap",
+          "scheduled-weekly-ledger-update"
+        ],
+        "missing_source_refs": [],
+        "source_mutation_count": 0,
+        "no_source_mutation_checked": true
+      }
+    }
+  ]
+}
\ No newline at end of file
diff --git a/apps/elf-eval/fixtures/report_snapshots/2026-06-19-service-native-dreaming-readback-report.json b/apps/elf-eval/fixtures/report_snapshots/2026-06-19-service-native-dreaming-readback-report.json
new file mode 100644
index 00000000..6513f53c
--- /dev/null
+++ b/apps/elf-eval/fixtures/report_snapshots/2026-06-19-service-native-dreaming-readback-report.json
@@ -0,0 +1,5231 @@
+{
+  "schema": "elf.real_world_job_report/v1",
+  "run_id": "real-world-memory-service-native-dreaming",
+  "generated_at": "2026-06-19T06:42:28.482226741Z",
+  "runner_version": "0.2.0-unknown-aarch64-unknown-linux-gnu",
+  "corpus_profile": "mixed",
+  "adapter": {
+    "adapter_id": "elf_service_native_dreaming",
+    "name": "ELF service-native Dreaming readback adapter",
+    "behavior": "service_native_dreaming_readback",
+    "storage": "pass",
+    "runtime": "pass",
+    "notes": "Materialized through ElfService add_note/list/search readback for memory_summary, proactive_brief, and scheduled_memory fixtures. Private/provider blockers remain typed non-pass records under XY-930."
+  },
+  "external_adapters": {
+    "schema": "elf.real_world_external_adapter_report/v1",
+    "manifest_id": "real-world-memory-project-adapters-2026-06-11-first-generation-continuity-source-store",
+    "docker_isolation": {
+      "default": true,
+      "compose_file": "docker-compose.baseline.yml",
+      "runner": "scripts/live-baseline-benchmark.sh",
+      "artifact_dir": "tmp/live-baseline/",
+      "host_global_installs_required": false,
+      "notes": [
+        "External project runs default to Docker Compose and Docker-managed caches.",
+        "Real-world job fixture reports and live baseline reports use separate schemas and claim boundaries."
+      ]
+    },
+    "summary": {
+      "adapter_count": 23,
+      "external_project_count": 16,
+      "docker_default_count": 23,
+      "host_global_install_required_count": 0,
+      "fixture_backed_count": 1,
+      "live_baseline_only_count": 6,
+      "live_real_world_count": 5,
+      "research_gate_count": 11,
+      "overall_status_counts": {
+        "real": 0,
+        "mocked": 0,
+        "unsupported": 0,
+        "blocked": 7,
+        "incomplete": 0,
+        "wrong_result": 6,
+        "lifecycle_fail": 1,
+        "pass": 4,
+        "not_encoded": 5
+      },
+      "capability_status_counts": {
+        "real": 8,
+        "mocked": 1,
+        "unsupported": 6,
+        "blocked": 23,
+        "incomplete": 0,
+        "wrong_result": 10,
+        "lifecycle_fail": 0,
+        "pass": 30,
+        "not_encoded": 26
+      },
+      "suite_status_counts": {
+        "real": 0,
+        "mocked": 0,
+        "unsupported": 0,
+        "blocked": 24,
+        "incomplete": 0,
+        "wrong_result": 7,
+        "lifecycle_fail": 0,
+        "pass": 27,
+        "not_encoded": 37
+      },
+      "scenario_status_counts": {
+        "real": 0,
+        "mocked": 0,
+        "unsupported": 3,
+        "blocked": 16,
+        "incomplete": 1,
+        "wrong_result": 6,
+        "lifecycle_fail": 1,
+        "pass": 23,
+        "not_encoded": 7
+      },
+      "scenario_position_counts": {
+        "wins": 10,
+        "ties": 11,
+        "loses": 1,
+        "untested": 35
+      },
+      "scenario_outcome_counts": {
+        "win": 10,
+        "tie": 11,
+        "loss": 1,
+        "not_tested": 13,
+        "blocked": 17,
+        "non_goal": 5
+      }
+    },
+    "adapters": [
+      {
+        "adapter_id": "elf_real_world_memory_fixture",
+        "project": "ELF",
+        "adapter_kind": "offline_fixture_response",
+        "evidence_class": "fixture_backed",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "blocked",
+        "setup": {
+          "status": "pass",
+          "evidence": "The checked-in real_world_memory fixtures parse and score through the ELF fixture runner.",
+          "command": "cargo make real-world-memory",
+          "artifact": "tmp/real-world-memory/real-world-memory-report.json"
+        },
+        "run": {
+          "status": "blocked",
+          "evidence": "The current fixture set reports 60 jobs across 16 suites: 53 pass, 0 incomplete, 7 blocked, 0 wrong_result, 0 not_encoded, and 0 unsupported_claim. The six core_archival_memory jobs pass as ELF fixture evidence, not as live Letta comparison evidence; the one memory_summary job passes as fixture-backed source-trace evidence, not as managed-memory parity evidence; the proactive_brief suite scores 4 passing evidence-linked suggestions plus one blocked private-corpus refresh case tied to XY-930, not Pulse or hosted managed-memory parity; the scheduled_memory suite scores 4 passing scheduled readback tasks plus one blocked private/provider scheduler case tied to XY-930, not hosted scheduler, ChatGPT Tasks, Pulse, or provider-backed private-corpus parity; context_trajectory remains blocked behind OpenViking staged-artifact materialization.",
+          "command": "cargo make real-world-memory",
+          "artifact": "tmp/real-world-memory/real-world-memory-report.json"
+        },
+        "result": {
+          "status": "blocked",
+          "evidence": "This is fixture-backed ELF scoring, not a live external adapter result.",
+          "artifact": "tmp/real-world-memory/real-world-memory-report.md"
+        },
+        "capabilities": [
+          {
+            "capability": "real_world_job_fixture_scoring",
+            "status": "real",
+            "evidence": "The runner scores checked-in real_world_job records with expected evidence, traps, and typed status output."
+          },
+          {
+            "capability": "live_external_adapter_execution",
+            "status": "not_encoded",
+            "evidence": "The ELF fixture response path does not exercise an external memory project runtime."
+          },
+          {
+            "capability": "docker_isolated_baseline",
+            "status": "pass",
+            "evidence": "ELF live baseline runs execute through docker-compose.baseline.yml for retrieval and lifecycle evidence."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "trust_source_of_truth",
+            "status": "pass",
+            "evidence": "Checked-in source-of-truth rebuild fixture is encoded and passing."
+          },
+          {
+            "suite_id": "work_resume",
+            "status": "pass",
+            "evidence": "Checked-in work-resume fixtures are encoded and passing."
+          },
+          {
+            "suite_id": "project_decisions",
+            "status": "pass",
+            "evidence": "Checked-in project-decision fixtures cover accepted decisions, reversals, current validation gates, rationale, and bounded caveats."
+          },
+          {
+            "suite_id": "retrieval",
+            "status": "pass",
+            "evidence": "Checked-in retrieval fixtures cover alternate phrasing, distractors, multi-hop routing, current-versus-obsolete selection, and minimal context."
+          },
+          {
+            "suite_id": "memory_evolution",
+            "status": "pass",
+            "evidence": "Checked-in memory-evolution fixtures cover current-versus-historical facts and the relation temporal-validity case is encoded."
+          },
+          {
+            "suite_id": "consolidation",
+            "status": "pass",
+            "evidence": "Proposal-only consolidation fixtures are encoded and passing without source mutation."
+          },
+          {
+            "suite_id": "memory_summary",
+            "status": "pass",
+            "evidence": "The source-trace memory summary fixture is encoded and passing with freshness, rationale, tombstone, and unsupported-claim guards."
+          },
+          {
+            "suite_id": "proactive_brief",
+            "status": "blocked",
+            "evidence": "The proactive brief suite scores 4 passing source-linked suggestions and 1 typed private-corpus refresh blocker tied to XY-930."
+          },
+          {
+            "suite_id": "scheduled_memory",
+            "status": "blocked",
+            "evidence": "The scheduled memory suite scores 4 passing source-linked task readbacks with execution trace coverage and 1 typed private/provider scheduler blocker tied to XY-930."
+          },
+          {
+            "suite_id": "knowledge_compilation",
+            "status": "pass",
+            "evidence": "Knowledge page fixtures are encoded and passing with citation and rebuild metrics."
+          },
+          {
+            "suite_id": "operator_debugging_ux",
+            "status": "pass",
+            "evidence": "Operator-debugging fixtures now expose stage attribution and dropped-candidate evidence without raw SQL."
+          },
+          {
+            "suite_id": "capture_integration",
+            "status": "pass",
+            "evidence": "Four redaction, exclusion, source-id, evidence-binding, and capture-boundary fixtures are encoded and passing."
+          },
+          {
+            "suite_id": "core_archival_memory",
+            "status": "pass",
+            "evidence": "Six fixture jobs score core block attachment, scope, provenance, stale-core detection, archival fallback, and project-decision recovery separately from archival note search."
+          },
+          {
+            "suite_id": "production_ops",
+            "status": "blocked",
+            "evidence": "Production-ops fixtures encode restore, Qdrant rebuild, backfill resume, resource-envelope interpretation, OpenViking wrong-result classification, plus typed blocked operator boundaries."
+          },
+          {
+            "suite_id": "personalization",
+            "status": "pass",
+            "evidence": "The scoped preference fixture is encoded and passing."
+          },
+          {
+            "suite_id": "context_trajectory",
+            "status": "blocked",
+            "evidence": "OpenViking staged retrieval, hierarchy selection, and recursive/context expansion fixtures are encoded as blocked until same-corpus evidence ids and staged artifacts are materialized."
+          }
+        ],
+        "scenarios": [],
+        "evidence": [
+          {
+            "kind": "fixture_dir",
+            "ref": "apps/elf-eval/fixtures/real_world_memory/",
+            "status": "real"
+          },
+          {
+            "kind": "command",
+            "ref": "cargo make real-world-memory",
+            "status": "pass"
+          }
+        ],
+        "notes": [
+          "This adapter record exists to keep ELF fixture results separate from live external adapter results.",
+          "The remaining non-pass ELF fixture states are production-ops operator boundaries plus OpenViking context-trajectory measurement gates.",
+          "Use elf_live_real_world for service-runtime real_world_job evidence; this fixture-backed record must not imply live-service behavior."
+        ]
+      },
+      {
+        "adapter_id": "elf_live_real_world",
+        "project": "ELF",
+        "adapter_kind": "docker_service_real_world_job",
+        "evidence_class": "live_real_world",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "wrong_result",
+        "setup": {
+          "status": "pass",
+          "evidence": "The live adapter task runs inside docker-compose.baseline.yml with Docker-owned Postgres, Qdrant, Cargo, npm, qmd, and cache volumes.",
+          "command": "cargo make real-world-memory-live-adapters",
+          "artifact": "tmp/real-world-memory/live-adapters/elf-materialization.json"
+        },
+        "run": {
+          "status": "wrong_result",
+          "evidence": "ELF materializes 55 real_world_job adapter_response objects through ElfService, worker indexing, search_raw, live capture/write-policy ingestion, live consolidation proposal review, live knowledge-page rebuild/lint, and operator-debug trace metadata before scoring; the full sweep includes typed wrong_result, blocked, and not_encoded job records.",
+          "command": "cargo make real-world-memory-live-adapters",
+          "artifact": "tmp/real-world-memory/live-adapters/elf-report.json"
+        },
+        "result": {
+          "status": "wrong_result",
+          "evidence": "The fresh full live sweep scores 55 jobs across all 13 checked-in suites, including live-scored consolidation, knowledge-page, capture/write-policy, and operator-debug suites. This is not a full-suite live pass because memory-evolution, production-ops, core-archival, and context-trajectory gaps remain typed non-pass records.",
+          "command": "cargo make real-world-memory-live-adapters",
+          "artifact": "tmp/real-world-memory/live-adapters/elf-report.md"
+        },
+        "capabilities": [
+          {
+            "capability": "real_world_job_adapter",
+            "status": "pass",
+            "evidence": "The adapter executes real_world_job prompts after runtime ingestion and writes generated answer artifacts before scoring."
+          },
+          {
+            "capability": "service_runtime_execution",
+            "status": "real",
+            "evidence": "The materializer uses ElfService, Postgres, Qdrant, deterministic providers, worker indexing, and search_raw in Docker."
+          },
+          {
+            "capability": "targeted_live_pass",
+            "status": "pass",
+            "evidence": "The answer-retrieval suites from the original representative slice still pass: work_resume, retrieval, and project_decisions."
+          },
+          {
+            "capability": "full_suite_live_sweep",
+            "status": "wrong_result",
+            "evidence": "The runner now emits per-job and per-suite live records for all 55 checked-in jobs, including the operator-debug fixture tree, but memory_evolution is wrong_result and production/core/context boundaries remain typed non-pass."
+          },
+          {
+            "capability": "full_suite_live_pass",
+            "status": "wrong_result",
+            "evidence": "No full-suite live pass is claimed; generated reports preserve wrong_result, blocked, and not_encoded job outcomes."
+          },
+          {
+            "capability": "typed_failure_reporting",
+            "status": "pass",
+            "evidence": "Adapter setup/runtime limitations are materialized as typed jobs with evidence JSON instead of silent claim upgrades."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "trust_source_of_truth",
+            "status": "pass",
+            "evidence": "The live adapter retrieved the restore/Qdrant rebuild proof evidence through the service runtime."
+          },
+          {
+            "suite_id": "work_resume",
+            "status": "pass",
+            "evidence": "The live adapter passed 5/5 work_resume jobs through service-runtime evidence retrieval."
+          },
+          {
+            "suite_id": "retrieval",
+            "status": "pass",
+            "evidence": "The live adapter passed 5/5 retrieval jobs through service-runtime evidence retrieval."
+          },
+          {
+            "suite_id": "project_decisions",
+            "status": "pass",
+            "evidence": "The live adapter passed 5/5 project_decisions jobs through service-runtime evidence retrieval."
+          },
+          {
+            "suite_id": "memory_evolution",
+            "status": "wrong_result",
+            "evidence": "The live adapter passed the delete/TTL case but failed five current-versus-historical conflict jobs because retrieval-backed answers did not provide the required historical conflict evidence links."
+          },
+          {
+            "suite_id": "consolidation",
+            "status": "pass",
+            "evidence": "The live adapter creates consolidation runs, materializes proposal jobs through the worker, preserves source lineage and unsupported-claim flags, and applies/defer/discards proposals through review audit transitions."
+          },
+          {
+            "suite_id": "knowledge_compilation",
+            "status": "pass",
+            "evidence": "The live adapter rebuilds derived knowledge pages through ElfService, searches page sections, lints stale source refs after runtime source updates, and emits citation/backlink/unsupported-section page artifacts."
+          },
+          {
+            "suite_id": "operator_debugging_ux",
+            "status": "pass",
+            "evidence": "The full live sweep includes operator_debugging_ux fixtures and emits trace ids, viewer/admin trace-bundle links, replay commands, dropped-candidate visibility, repair-action clarity, and raw_sql_needed=false."
+          },
+          {
+            "suite_id": "capture_integration",
+            "status": "pass",
+            "evidence": "The live adapter passes 4/4 capture_integration jobs through Docker-local ELF ingestion, including capture-boundary classification, excluded evidence ids, source ids in source_ref, write_policy redaction audit counts, evidence binding, and zero secret leakage."
+          },
+          {
+            "suite_id": "production_ops",
+            "status": "blocked",
+            "evidence": "The live adapter sweep does not run backup/restore, private corpus, provider credential, or backfill operations; existing production-ops credential and private-manifest boundaries remain blocked."
+          },
+          {
+            "suite_id": "personalization",
+            "status": "pass",
+            "evidence": "The live adapter retrieved the scoped preference evidence and passed the personalization job."
+          },
+          {
+            "suite_id": "core_archival_memory",
+            "status": "not_encoded",
+            "evidence": "The full live adapter sweep preserves the core/archival fixture gap as typed not_encoded; this issue does not add live core-block attachment/readback materialization."
+          },
+          {
+            "suite_id": "context_trajectory",
+            "status": "blocked",
+            "evidence": "The OpenViking-style context trajectory fixtures remain blocked by live staged-trajectory and recursive-expansion measurement gaps."
+          }
+        ],
+        "scenarios": [
+          {
+            "scenario_id": "live_capture_write_policy",
+            "suite_id": "capture_integration",
+            "status": "pass",
+            "elf_position": "ties",
+            "comparison_outcome": "tie",
+            "evidence": "ELF live capture/write-policy jobs pass for redaction, exclusions, source ids, evidence binding, and no secret leakage. This is an ELF self-check, not a win over external hook systems.",
+            "command": "cargo make real-world-memory-live-adapters",
+            "artifact": "tmp/real-world-memory/live-adapters/elf-materialization.json"
+          },
+          {
+            "scenario_id": "live_consolidation_proposal_review",
+            "suite_id": "consolidation",
+            "status": "pass",
+            "elf_position": "ties",
+            "comparison_outcome": "tie",
+            "evidence": "ELF live consolidation jobs now exercise source lineage, unsupported-claim flags, and apply/defer/discard review audit transitions. This is an ELF service self-check, not a broad competitor win.",
+            "command": "cargo make real-world-memory-live-adapters",
+            "artifact": "tmp/real-world-memory/live-adapters/elf-materialization.json"
+          },
+          {
+            "scenario_id": "live_knowledge_page_rebuild_lint",
+            "suite_id": "knowledge_compilation",
+            "status": "pass",
+            "elf_position": "ties",
+            "comparison_outcome": "tie",
+            "evidence": "ELF live knowledge jobs now exercise page rebuild, search, stale-source lint, citations, backlinks, and unsupported-section handling. This is an ELF service self-check, not a broad knowledge-product win.",
+            "command": "cargo make real-world-memory-live-adapters",
+            "artifact": "tmp/real-world-memory/live-adapters/elf-materialization.json"
+          },
+          {
+            "scenario_id": "full_sweep_operator_debug",
+            "suite_id": "operator_debugging_ux",
+            "status": "pass",
+            "elf_position": "wins",
+            "comparison_outcome": "win",
+            "evidence": "ELF full live sweep now includes the operator-debug fixture tree with hydrated trace ids, trace-bundle replay commands, dropped-candidate visibility, repair guidance, and no raw SQL requirement.",
+            "command": "cargo make real-world-memory-live-adapters",
+            "artifact": "tmp/real-world-memory/live-adapters/elf-materialization.json"
+          }
+        ],
+        "evidence": [
+          {
+            "kind": "fixture_dir",
+            "ref": "apps/elf-eval/fixtures/real_world_memory/",
+            "status": "real"
+          },
+          {
+            "kind": "fixture_dir",
+            "ref": "apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/",
+            "status": "real"
+          },
+          {
+            "kind": "command",
+            "ref": "cargo make real-world-memory-live-adapters",
+            "status": "pass"
+          },
+          {
+            "kind": "artifact",
+            "ref": "tmp/real-world-memory/live-adapters/elf-report.json",
+            "status": "pass"
+          }
+        ],
+        "notes": [
+          "This Docker-isolated live real_world_job record now covers the full encoded fixture corpus, not only the original three-suite representative slice.",
+          "The record is a full-suite sweep, not a full-suite pass; wrong_result, blocked, and not_encoded states remain visible.",
+          "This record does not prove private-corpus production quality or provider-backed production operations."
+        ]
+      },
+      {
+        "adapter_id": "qmd_live_baseline",
+        "project": "qmd",
+        "adapter_kind": "docker_cli_same_corpus",
+        "evidence_class": "live_baseline_only",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "pass",
+        "setup": {
+          "status": "pass",
+          "evidence": "The live-baseline Docker runner installs qmd inside the baseline container.",
+          "command": "ELF_BASELINE_PROJECTS=qmd cargo make baseline-live-docker",
+          "artifact": "tmp/live-baseline/qmd.log"
+        },
+        "run": {
+          "status": "pass",
+          "evidence": "qmd same-corpus retrieval, update, delete, and cold-start checks are encoded in the live baseline runner.",
+          "command": "ELF_BASELINE_PROJECTS=qmd cargo make baseline-live-docker",
+          "artifact": "tmp/live-baseline/live-baseline-report.json"
+        },
+        "result": {
+          "status": "pass",
+          "evidence": "This live_baseline_only record is same-corpus evidence only; cite qmd_live_real_world for the full live real-world sweep.",
+          "artifact": "docs/runbook/benchmarking/live_baseline_benchmark.md"
+        },
+        "capabilities": [
+          {
+            "capability": "same_corpus_retrieval",
+            "status": "pass",
+            "evidence": "qmd has an encoded Docker same-corpus retrieval adapter."
+          },
+          {
+            "capability": "update_delete_cold_start",
+            "status": "pass",
+            "evidence": "qmd lifecycle smoke checks are encoded in the live-baseline runner."
+          },
+          {
+            "capability": "real_world_job_adapter",
+            "status": "not_encoded",
+            "evidence": "This live_baseline_only record does not execute real_world_job prompts; cite qmd_live_real_world for the full live real-world sweep."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "retrieval",
+            "status": "not_encoded",
+            "evidence": "This live_baseline_only record does not execute real_world_job retrieval prompts; cite qmd_live_real_world for the live retrieval adapter run."
+          },
+          {
+            "suite_id": "memory_evolution",
+            "status": "not_encoded",
+            "evidence": "Live-baseline lifecycle checks exist, but no real_world_job memory_evolution run is encoded."
+          },
+          {
+            "suite_id": "operator_debugging_ux",
+            "status": "not_encoded",
+            "evidence": "qmd debug ergonomics are a reference dimension; no operator_debugging_ux fixture is executed against qmd."
+          }
+        ],
+        "scenarios": [],
+        "evidence": [
+          {
+            "kind": "runner",
+            "ref": "scripts/live-baseline-benchmark.sh",
+            "status": "real"
+          },
+          {
+            "kind": "compose",
+            "ref": "docker-compose.baseline.yml",
+            "status": "real"
+          }
+        ],
+        "notes": [
+          "This same-corpus record remains separate from qmd_live_real_world, which records real_world_job prompt execution and scoring evidence."
+        ]
+      },
+      {
+        "adapter_id": "qmd_live_real_world",
+        "project": "qmd",
+        "adapter_kind": "docker_cli_real_world_job",
+        "evidence_class": "live_real_world",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "wrong_result",
+        "setup": {
+          "status": "pass",
+          "evidence": "The live adapter task clones and installs qmd inside the baseline Docker container when the checkout is absent.",
+          "command": "cargo make real-world-memory-live-adapters",
+          "artifact": "tmp/real-world-memory/live-adapters/qmd-materialization.json"
+        },
+        "run": {
+          "status": "wrong_result",
+          "evidence": "qmd materializes 55 real_world_job adapter_response objects through collection add, update, embed, and query --json before scoring; the full sweep includes typed wrong_result, blocked, and not_encoded job records, with operator-debug fixtures scored through qmd replay metadata rather than ELF trace hydration.",
+          "command": "cargo make real-world-memory-live-adapters",
+          "artifact": "tmp/real-world-memory/live-adapters/qmd-report.json"
+        },
+        "result": {
+          "status": "wrong_result",
+          "evidence": "The fresh full qmd live sweep scores 55 jobs across all 13 checked-in suites, preserving consolidation, knowledge-page, capture, production-ops, core-archival, and context-trajectory gaps as typed non-pass records. This is not a full-suite live pass.",
+          "command": "cargo make real-world-memory-live-adapters",
+          "artifact": "tmp/real-world-memory/live-adapters/qmd-report.md"
+        },
+        "capabilities": [
+          {
+            "capability": "real_world_job_adapter",
+            "status": "pass",
+            "evidence": "qmd executes real_world_job prompts through its local CLI retrieval/query workflow and records generated answer artifacts."
+          },
+          {
+            "capability": "local_cli_retrieval",
+            "status": "real",
+            "evidence": "The adapter uses qmd collection add, update, embed -f, and query --json inside Docker."
+          },
+          {
+            "capability": "targeted_live_pass",
+            "status": "pass",
+            "evidence": "The answer-retrieval suites from the original representative slice still pass: work_resume, retrieval, and project_decisions."
+          },
+          {
+            "capability": "full_suite_live_sweep",
+            "status": "wrong_result",
+            "evidence": "The runner now emits per-job and per-suite live records for all 55 checked-in jobs, including the operator-debug fixture tree, but memory_evolution and operator_debugging_ux are wrong_result while non-qmd product surfaces remain typed not_encoded or blocked."
+          },
+          {
+            "capability": "full_suite_live_pass",
+            "status": "wrong_result",
+            "evidence": "No full-suite live pass is claimed; generated reports preserve wrong_result, blocked, and not_encoded job outcomes."
+          },
+          {
+            "capability": "typed_failure_reporting",
+            "status": "pass",
+            "evidence": "qmd setup/runtime limitations are materialized as typed jobs with command evidence and retry artifacts."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "trust_source_of_truth",
+            "status": "pass",
+            "evidence": "qmd retrieved the restore/Qdrant rebuild proof evidence through the local CLI workflow."
+          },
+          {
+            "suite_id": "work_resume",
+            "status": "pass",
+            "evidence": "qmd passed 5/5 work_resume jobs through CLI evidence retrieval."
+          },
+          {
+            "suite_id": "retrieval",
+            "status": "pass",
+            "evidence": "qmd passed 5/5 retrieval jobs through CLI evidence retrieval."
+          },
+          {
+            "suite_id": "project_decisions",
+            "status": "pass",
+            "evidence": "qmd passed 5/5 project_decisions jobs through CLI evidence retrieval."
+          },
+          {
+            "suite_id": "memory_evolution",
+            "status": "wrong_result",
+            "evidence": "qmd failed all six memory-evolution jobs in the fresh June 11 diagnostic, including the delete/TTL tombstone job where qmd retrieved only the current plan and missed the tombstone evidence."
+          },
+          {
+            "suite_id": "consolidation",
+            "status": "not_encoded",
+            "evidence": "The qmd live adapter sweep retrieves evidence-linked answers but does not generate or review consolidation proposals."
+          },
+          {
+            "suite_id": "knowledge_compilation",
+            "status": "not_encoded",
+            "evidence": "The qmd live adapter sweep retrieves evidence-linked answers but does not generate derived knowledge pages."
+          },
+          {
+            "suite_id": "operator_debugging_ux",
+            "status": "wrong_result",
+            "evidence": "The full qmd live sweep includes operator_debugging_ux fixtures and records replay-command metadata, but it lacks ELF trace hydration, viewer links, and intermediate candidate-drop stages, so the suite remains wrong_result."
+          },
+          {
+            "suite_id": "capture_integration",
+            "status": "not_encoded",
+            "evidence": "The qmd live adapter sweep does not exercise capture integrations or write-policy redaction boundaries; all capture_integration jobs remain typed not_encoded for qmd."
+          },
+          {
+            "suite_id": "production_ops",
+            "status": "blocked",
+            "evidence": "The qmd live adapter sweep does not run backup/restore, private corpus, provider credential, or backfill operations; existing production-ops credential and private-manifest boundaries remain blocked."
+          },
+          {
+            "suite_id": "personalization",
+            "status": "pass",
+            "evidence": "qmd retrieved the scoped preference evidence and passed the personalization job."
+          },
+          {
+            "suite_id": "core_archival_memory",
+            "status": "not_encoded",
+            "evidence": "The qmd live adapter sweep preserves the core/archival fixture gap as typed not_encoded; qmd does not expose ELF core-block attachment/readback materialization."
+          },
+          {
+            "suite_id": "context_trajectory",
+            "status": "blocked",
+            "evidence": "The OpenViking-style context trajectory fixtures remain blocked by live staged-trajectory and recursive-expansion measurement gaps."
+          }
+        ],
+        "scenarios": [],
+        "evidence": [
+          {
+            "kind": "fixture_dir",
+            "ref": "apps/elf-eval/fixtures/real_world_memory/",
+            "status": "real"
+          },
+          {
+            "kind": "fixture_dir",
+            "ref": "apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/",
+            "status": "real"
+          },
+          {
+            "kind": "command",
+            "ref": "cargo make real-world-memory-live-adapters",
+            "status": "pass"
+          },
+          {
+            "kind": "artifact",
+            "ref": "tmp/real-world-memory/live-adapters/qmd-report.json",
+            "status": "pass"
+          }
+        ],
+        "notes": [
+          "This qmd record is real-world job evidence and must not be conflated with the same-corpus qmd_live_baseline record.",
+          "The record is a full-suite sweep, not a full-suite pass; wrong_result, blocked, and not_encoded states remain visible.",
+          "This record does not prove broad RAG/graph adapter parity or private-corpus production quality."
+        ]
+      },
+      {
+        "adapter_id": "elf_operator_debug_live",
+        "project": "ELF",
+        "adapter_kind": "docker_service_operator_debug_real_world_job",
+        "evidence_class": "live_real_world",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "pass",
+        "setup": {
+          "status": "pass",
+          "evidence": "The narrow operator-debug live task runs inside docker-compose.baseline.yml with Docker-owned Postgres, Qdrant, Cargo, npm, qmd, and cache volumes.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-materialization.json"
+        },
+        "run": {
+          "status": "pass",
+          "evidence": "ELF materializes operator_debugging_ux adapter_response objects through ElfService, worker indexing, search_raw trace ids, and generated operator_debug metadata.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-report.json"
+        },
+        "result": {
+          "status": "pass",
+          "evidence": "The narrow live slice scores operator-debugging jobs with trace availability, replay command availability, candidate-drop visibility, repair-action clarity, and raw-SQL avoidance separated in job-level evidence.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-report.md"
+        },
+        "capabilities": [
+          {
+            "capability": "operator_debug_real_world_job_adapter",
+            "status": "pass",
+            "evidence": "The adapter executes the checked-in operator_debugging_ux jobs through the live service materializer and generated scoring fixtures."
+          },
+          {
+            "capability": "trace_hydration_metadata",
+            "status": "pass",
+            "evidence": "Generated operator_debug records include service trace ids, viewer links, admin trace-bundle URLs, and trace_available=true."
+          },
+          {
+            "capability": "replay_command_metadata",
+            "status": "pass",
+            "evidence": "Generated operator_debug records include admin trace-bundle curl replay commands; no raw SQL path is required."
+          },
+          {
+            "capability": "candidate_drop_visibility",
+            "status": "pass",
+            "evidence": "The operator-debug jobs keep dropped-candidate visibility as explicit job-level evidence instead of relying on direct database inspection."
+          },
+          {
+            "capability": "openmemory_or_claude_mem_ui_runner",
+            "status": "not_encoded",
+            "evidence": "This ELF live slice does not launch OpenMemory or claude-mem UI flows."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "operator_debugging_ux",
+            "status": "pass",
+            "evidence": "The narrow live operator-debug slice scores trace hydration, stage attribution, candidate-drop visibility, selected-but-not-narrated diagnosis, and repair-action clarity through generated ELF live artifacts."
+          }
+        ],
+        "scenarios": [
+          {
+            "scenario_id": "operator_debug_trace_hydration",
+            "suite_id": "operator_debugging_ux",
+            "status": "pass",
+            "elf_position": "wins",
+            "comparison_outcome": "win",
+            "evidence": "ELF generated trace_available=true, service trace ids, viewer URLs, and admin trace-bundle replay URLs for the operator-debug jobs; qmd has replay rows but no ELF trace hydration surface.",
+            "command": "cargo make real-world-job-operator-ux-live-adapters",
+            "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-report.json"
+          },
+          {
+            "scenario_id": "operator_debug_replay_command",
+            "suite_id": "operator_debugging_ux",
+            "status": "pass",
+            "elf_position": "ties",
+            "comparison_outcome": "tie",
+            "evidence": "ELF generated admin trace-bundle replay commands; qmd generated local CLI query replay commands. These are comparable replay-command availability artifacts, not equivalent UI quality claims.",
+            "command": "cargo make real-world-job-operator-ux-live-adapters",
+            "artifact": "tmp/real-world-job/operator-ux-live-adapters/summary.json"
+          },
+          {
+            "scenario_id": "operator_debug_candidate_drop_visibility",
+            "suite_id": "operator_debugging_ux",
+            "status": "pass",
+            "elf_position": "wins",
+            "comparison_outcome": "win",
+            "evidence": "ELF generated operator_debug candidate-drop visibility from trace and replay-candidate metadata without direct SQL assumptions; qmd keeps only top-k replay rows and lacks intermediate candidate-drop stages.",
+            "command": "cargo make real-world-job-operator-ux-live-adapters",
+            "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-materialization.json"
+          },
+          {
+            "scenario_id": "operator_debug_repair_action_clarity",
+            "suite_id": "operator_debugging_ux",
+            "status": "pass",
+            "elf_position": "ties",
+            "comparison_outcome": "tie",
+            "evidence": "ELF and qmd generated clear repair/replay steps for the narrow operator-debug jobs; OpenMemory UI/export remains blocked, and claude-mem UI repair paths remain blocked until Docker-contained hook/viewer evidence exists.",
+            "command": "cargo make real-world-job-operator-ux-live-adapters",
+            "artifact": "tmp/real-world-job/operator-ux-live-adapters/summary.json"
+          },
+          {
+            "scenario_id": "operator_debug_selected_but_not_narrated",
+            "suite_id": "operator_debugging_ux",
+            "status": "pass",
+            "elf_position": "wins",
+            "comparison_outcome": "win",
+            "evidence": "The new selected-but-not-narrated job scores whether selected trace evidence is available for answer-composition repair without direct database inspection.",
+            "command": "cargo make real-world-job-operator-ux-live-adapters",
+            "artifact": "tmp/real-world-job/operator-ux-live-adapters/elf-report.json"
+          }
+        ],
+        "evidence": [
+          {
+            "kind": "fixture_dir",
+            "ref": "apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/",
+            "status": "real"
+          },
+          {
+            "kind": "command",
+            "ref": "cargo make real-world-job-operator-ux-live-adapters",
+            "status": "pass"
+          },
+          {
+            "kind": "artifact",
+            "ref": "tmp/real-world-job/operator-ux-live-adapters/elf-report.json",
+            "status": "pass"
+          }
+        ],
+        "notes": [
+          "This is a narrow operator-debug live slice, not a full-suite live pass.",
+          "The record does not implement product UI improvements and does not claim broad qmd/OpenMemory/claude-mem superiority."
+        ]
+      },
+      {
+        "adapter_id": "qmd_operator_debug_live",
+        "project": "qmd",
+        "adapter_kind": "docker_cli_operator_debug_real_world_job",
+        "evidence_class": "live_real_world",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "wrong_result",
+        "setup": {
+          "status": "pass",
+          "evidence": "The narrow operator-debug live task clones and installs qmd inside the baseline Docker container when the checkout is absent.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-materialization.json"
+        },
+        "run": {
+          "status": "wrong_result",
+          "evidence": "qmd materializes operator_debugging_ux adapter_response objects through collection add, update, embed, and query --json, then records local replay-command metadata but no service trace hydration.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json"
+        },
+        "result": {
+          "status": "wrong_result",
+          "evidence": "The narrow live slice gives qmd explicit replay-command evidence, but operator-debug jobs remain wrong_result where trace availability, trace completeness, or candidate-drop stage visibility is required.",
+          "command": "cargo make real-world-job-operator-ux-live-adapters",
+          "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.md"
+        },
+        "capabilities": [
+          {
+            "capability": "operator_debug_real_world_job_adapter",
+            "status": "pass",
+            "evidence": "The adapter executes the checked-in operator_debugging_ux jobs through qmd local CLI materialization and generated scoring fixtures."
+          },
+          {
+            "capability": "local_replay_command_metadata",
+            "status": "pass",
+            "evidence": "Generated operator_debug records include qmd query replay commands tied to per-job collections."
+          },
+          {
+            "capability": "trace_hydration_metadata",
+            "status": "wrong_result",
+            "evidence": "Generated qmd operator_debug records have trace_available=false and no ELF viewer/admin trace bundle because qmd exposes local replay rows rather than service trace hydration."
+          },
+          {
+            "capability": "candidate_drop_visibility",
+            "status": "wrong_result",
+            "evidence": "qmd top-k replay output is available, but intermediate candidate-drop stages are not exposed in the generated artifact."
+          },
+          {
+            "capability": "openmemory_or_claude_mem_ui_runner",
+            "status": "not_encoded",
+            "evidence": "This qmd live slice does not launch OpenMemory or claude-mem UI flows."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "operator_debugging_ux",
+            "status": "wrong_result",
+            "evidence": "The narrow qmd operator-debug slice scores local replay commands but remains wrong_result for trace hydration and candidate-drop stage visibility."
+          }
+        ],
+        "scenarios": [
+          {
+            "scenario_id": "operator_debug_trace_hydration",
+            "suite_id": "operator_debugging_ux",
+            "status": "wrong_result",
+            "elf_position": "wins",
+            "comparison_outcome": "win",
+            "evidence": "qmd generated replay-command metadata but trace_available=false, so ELF wins only this trace-hydration dimension; this is not a broad qmd loss.",
+            "command": "cargo make real-world-job-operator-ux-live-adapters",
+            "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json"
+          },
+          {
+            "scenario_id": "operator_debug_replay_command",
+            "suite_id": "operator_debugging_ux",
+            "status": "pass",
+            "elf_position": "ties",
+            "comparison_outcome": "tie",
+            "evidence": "qmd generated local CLI query replay commands for the same operator-debugging scenarios; ELF generated admin trace-bundle curl commands.",
+            "command": "cargo make real-world-job-operator-ux-live-adapters",
+            "artifact": "tmp/real-world-job/operator-ux-live-adapters/summary.json"
+          },
+          {
+            "scenario_id": "operator_debug_candidate_drop_visibility",
+            "suite_id": "operator_debugging_ux",
+            "status": "wrong_result",
+            "elf_position": "wins",
+            "comparison_outcome": "win",
+            "evidence": "qmd generated top-k replay output but not intermediate retrieved-but-dropped stage visibility, so candidate-drop diagnosis remains a qmd wrong_result in this narrow slice.",
+            "command": "cargo make real-world-job-operator-ux-live-adapters",
+            "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-materialization.json"
+          },
+          {
+            "scenario_id": "operator_debug_repair_action_clarity",
+            "suite_id": "operator_debugging_ux",
+            "status": "pass",
+            "elf_position": "ties",
+            "comparison_outcome": "tie",
+            "evidence": "qmd generated clear local replay steps for repair investigation, matching ELF on repair-action clarity while differing on trace hydration.",
+            "command": "cargo make real-world-job-operator-ux-live-adapters",
+            "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json"
+          },
+          {
+            "scenario_id": "operator_debug_selected_but_not_narrated",
+            "suite_id": "operator_debugging_ux",
+            "status": "wrong_result",
+            "elf_position": "wins",
+            "comparison_outcome": "win",
+            "evidence": "qmd can replay top-k rows, but the generated artifact does not expose service trace narration stages for the selected-but-not-narrated diagnosis.",
+            "command": "cargo make real-world-job-operator-ux-live-adapters",
+            "artifact": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json"
+          }
+        ],
+        "evidence": [
+          {
+            "kind": "fixture_dir",
+            "ref": "apps/elf-eval/fixtures/real_world_job/operator_debugging_ux/",
+            "status": "real"
+          },
+          {
+            "kind": "command",
+            "ref": "cargo make real-world-job-operator-ux-live-adapters",
+            "status": "wrong_result"
+          },
+          {
+            "kind": "artifact",
+            "ref": "tmp/real-world-job/operator-ux-live-adapters/qmd-report.json",
+            "status": "wrong_result"
+          }
+        ],
+        "notes": [
+          "This is a narrow operator-debug live slice, not a full-suite live pass.",
+          "qmd's replay-command availability remains useful; the wrong_result status is limited to trace hydration and candidate-drop stage visibility."
+        ]
+      },
+      {
+        "adapter_id": "agentmemory_live_baseline",
+        "project": "agentmemory",
+        "adapter_kind": "docker_sdk_mock_same_corpus",
+        "evidence_class": "live_baseline_only",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "lifecycle_fail",
+        "setup": {
+          "status": "pass",
+          "evidence": "The live-baseline Docker runner installs and exercises agentmemory package APIs.",
+          "command": "ELF_BASELINE_PROJECTS=agentmemory cargo make baseline-live-docker",
+          "artifact": "tmp/live-baseline/agentmemory.log"
+        },
+        "run": {
+          "status": "lifecycle_fail",
+          "evidence": "Same-corpus retrieval can run, but durable lifecycle behavior is not proven because the adapter uses an in-memory SDK/KV mock.",
+          "artifact": "tmp/live-baseline/live-baseline-report.json"
+        },
+        "result": {
+          "status": "lifecycle_fail",
+          "evidence": "agentmemory remains a reference for capture and continuity UX, but current Docker evidence is not a durable lifecycle pass.",
+          "artifact": "tmp/live-baseline/live-baseline-report.json"
+        },
+        "capabilities": [
+          {
+            "capability": "same_corpus_retrieval",
+            "status": "pass",
+            "evidence": "The current adapter can run mem::remember and mem::search against the shared corpus."
+          },
+          {
+            "capability": "adapter_storage",
+            "status": "mocked",
+            "evidence": "The current adapter uses a process-local StateKV Map and in-memory index."
+          },
+          {
+            "capability": "durable_cold_start",
+            "status": "blocked",
+            "evidence": "A persistent upstream KV/index path or hosted runtime is needed before cold-start recovery can be fairly scored."
+          },
+          {
+            "capability": "durable_work_resume_capture_path",
+            "status": "blocked",
+            "evidence": "XY-925 selects the next local path as a Docker-contained agentmemory session directory with persisted SDK KV store, observation log, and searchable index across a fresh process; the current StateKV Map and in-memory index still block scoring."
+          },
+          {
+            "capability": "write_policy_hook_capture",
+            "status": "blocked",
+            "evidence": "Capture/write-policy jobs require live agentmemory hook observations plus persisted write-policy audit evidence. The current adapter does not execute those hooks."
+          },
+          {
+            "capability": "real_world_job_adapter",
+            "status": "blocked",
+            "evidence": "XY-925 adds fixture-backed blocked prompt coverage for the required durable path, but no live agentmemory real_world_job adapter executes prompts until the persistent local store exists."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "work_resume",
+            "status": "blocked",
+            "evidence": "A durable upstream agentmemory session/capture path is required before work-resume jobs can be compared fairly."
+          },
+          {
+            "suite_id": "capture_integration",
+            "status": "blocked",
+            "evidence": "The current fixture import boundary is offline and does not run live agentmemory hooks."
+          },
+          {
+            "suite_id": "memory_evolution",
+            "status": "blocked",
+            "evidence": "Durable update/supersede/delete history is not proven by the in-memory adapter."
+          }
+        ],
+        "scenarios": [
+          {
+            "scenario_id": "basic_same_corpus_retrieval",
+            "suite_id": "retrieval",
+            "status": "pass",
+            "elf_position": "untested",
+            "comparison_outcome": "not_tested",
+            "evidence": "Fresh comparable baseline run live-baseline-20260611061612 reports agentmemory retrieval_pass with 3/3 same-corpus retrieval checks through mem::remember and mem::search. This is live-baseline-only evidence through an in-memory mock, not a real_world_job suite pass.",
+            "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker",
+            "artifact": "tmp/live-baseline/live-baseline-report.json"
+          },
+          {
+            "scenario_id": "durable_update_reload_lifecycle",
+            "suite_id": "memory_evolution",
+            "status": "lifecycle_fail",
+            "elf_position": "wins",
+            "comparison_outcome": "win",
+            "evidence": "Fresh comparable baseline run live-baseline-20260611061612 reports ELF passing 8/8 local lifecycle checks, while agentmemory update_replaces_note_text is lifecycle_fail and cold_start_recovery_search is blocked because the harness uses an in-memory SDK/KV mock. This is an ELF baseline win only at the local lifecycle-smoke evidence class.",
+            "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker",
+            "artifact": "tmp/live-baseline/live-baseline-report.json"
+          },
+          {
+            "scenario_id": "work_resume_capture_continuity",
+            "suite_id": "work_resume",
+            "status": "blocked",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "agentmemory's relevant strength is durable coding-agent continuity and capture, but the Docker harness has not proven a persistent session/capture path. XY-925 selects the durable local path as a Docker-contained session directory that persists the SDK KV store and searchable index across a fresh process; keep work_resume and capture claims blocked until that path exists.",
+            "command": "cargo make real-world-first-generation-oss",
+            "artifact": "tmp/real-world-memory/first-generation-oss/report.json"
+          },
+          {
+            "scenario_id": "durable_work_resume_local_path",
+            "suite_id": "work_resume",
+            "status": "blocked",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "The selected comparable path is explicit: capture into a Docker-local agentmemory session directory, persist the SDK KV/index and observation log, restart a fresh process, then score work_resume prompts. The checked-in fixture records this as blocked rather than scoring the current mock.",
+            "command": "cargo make real-world-first-generation-oss",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/agentmemory_durable_capture_path_blocked.json"
+          },
+          {
+            "scenario_id": "capture_write_policy_hooks",
+            "suite_id": "capture_integration",
+            "status": "blocked",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "agentmemory capture/write-policy comparison needs live hook observations and write-policy audit evidence persisted through the selected local store. The fixture preserves this as a typed blocker and does not convert the mem::remember smoke into capture proof.",
+            "command": "cargo make real-world-first-generation-oss",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/agentmemory_durable_capture_path_blocked.json"
+          }
+        ],
+        "evidence": [
+          {
+            "kind": "evidence",
+            "ref": "docs/evidence/external_memory/agentmemory_adapter.md",
+            "status": "real"
+          },
+          {
+            "kind": "runner",
+            "ref": "scripts/live-baseline-benchmark.sh",
+            "status": "mocked"
+          }
+        ],
+        "notes": [
+          "The offline agentmemory fixture adapter is an import/comparison boundary and must not be treated as live benchmark proof."
+        ],
+        "follow_up": {
+          "title": "[ELF benchmark P0] Make agentmemory adapter lifecycle-durable and fail-typed",
+          "reason": "A durable upstream agentmemory storage path is required before lifecycle and real-world job suites can be fairly scored."
+        }
+      },
+      {
+        "adapter_id": "mem0_openmemory_live_baseline",
+        "project": "mem0/OpenMemory",
+        "adapter_kind": "docker_sdk_same_corpus",
+        "evidence_class": "live_baseline_only",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "pass",
+        "setup": {
+          "status": "pass",
+          "evidence": "The live-baseline Docker runner can install mem0 and configure local FastEmbed/Qdrant paths.",
+          "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
+          "artifact": "tmp/live-baseline/mem0.log"
+        },
+        "run": {
+          "status": "pass",
+          "evidence": "Fresh scoped baseline run live-baseline-20260611122416 exercises local OSS mem0 with FastEmbed, Qdrant path storage, Memory.update, Memory.delete, Memory.history, Memory.get_all, entity filters, and cold-start reload; mem0 passed 8/8 encoded SDK checks. XY-931 adds a separate OpenMemory export-helper setup probe artifact and keeps that blocked UI/export result out of the SDK check summary.",
+          "command": "cargo make openmemory-ui-export-readback",
+          "artifact": "tmp/live-baseline/live-baseline-report.json"
+        },
+        "result": {
+          "status": "pass",
+          "evidence": "The local OSS mem0 baseline now passes same-corpus retrieval, update/delete/reload, preference correction history, entity-scoped personalization, local get_all export-style readback, and deletion audit history. The separate OpenMemory export-helper setup probe is blocked because Docker is unavailable inside the baseline-runner container before any product app database readback can run. It still does not claim hosted Platform export, optional graph memory, or a real_world_job prompt adapter.",
+          "artifact": "tmp/live-baseline/live-baseline-report.json"
+        },
+        "capabilities": [
+          {
+            "capability": "local_storage",
+            "status": "real",
+            "evidence": "The adapter targets local FastEmbed, Qdrant path storage, and local history DB paths in Docker."
+          },
+          {
+            "capability": "same_corpus_retrieval",
+            "status": "pass",
+            "evidence": "Fresh scoped baseline run live-baseline-20260611122416 reports mem0 retrieval_pass with 3/3 same-corpus retrieval checks."
+          },
+          {
+            "capability": "local_lifecycle_update_delete_reload",
+            "status": "pass",
+            "evidence": "The Docker runner exercises public Memory.update, Memory.delete, and a new Memory.from_config over the same local Qdrant/history paths; the fresh scoped run reports those lifecycle checks passing."
+          },
+          {
+            "capability": "preference_correction_history",
+            "status": "pass",
+            "evidence": "The fresh scoped run reports preference_correction_history as pass: Memory.history preserved explicit ADD and UPDATE records with old and current preference text, and search returned only the current correction."
+          },
+          {
+            "capability": "entity_scoped_personalization",
+            "status": "pass",
+            "evidence": "The fresh scoped run reports entity_scoped_personalization as pass: user_id, agent_id, and run_id filters returned the ELF scoped preference and omitted a PubFi scoped preference."
+          },
+          {
+            "capability": "local_get_all_export_readback",
+            "status": "pass",
+            "evidence": "The fresh scoped run reports local_get_all_export_readback as pass: Memory.get_all returned the current scoped preference and omitted the other scope."
+          },
+          {
+            "capability": "deletion_audit_history",
+            "status": "pass",
+            "evidence": "The fresh scoped run reports delete_history_audit_readback as pass: Memory.history exposed a DELETE event and search suppressed the deleted memory."
+          },
+          {
+            "capability": "openmemory_ui_readback",
+            "status": "blocked",
+            "evidence": "XY-931 runs a bounded OpenMemory export-helper setup probe after the mem0 SDK corpus checks. The probe finds the OpenMemory tree, UI package, compose file, and export helper, then records a setup blocker because the export helper requires Docker access to a running OpenMemory container. Local SDK get_all readback is measured separately and must not be reused as UI evidence."
+          },
+          {
+            "capability": "hosted_managed_memory_claims",
+            "status": "unsupported",
+            "evidence": "Hosted mem0 Platform behavior and Platform UI export are outside the local OSS Docker adapter and are non-goals for this local evidence record."
+          },
+          {
+            "capability": "real_world_job_adapter",
+            "status": "not_encoded",
+            "evidence": "No mem0/OpenMemory adapter currently executes real_world_job prompts and answer scoring."
+          },
+          {
+            "capability": "optional_graph_memory",
+            "status": "not_encoded",
+            "evidence": "Optional graph memory is not enabled in the default local OSS path and remains an opt-in scenario gate rather than a default pass/fail claim."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "memory_evolution",
+            "status": "not_encoded",
+            "evidence": "Scenario-level local OSS checks now measure preference correction history and deletion audit readback, but no mem0 real_world_job memory_evolution prompt adapter is encoded."
+          },
+          {
+            "suite_id": "personalization",
+            "status": "not_encoded",
+            "evidence": "Scenario-level local OSS checks now measure entity-scoped personalization, but no mem0 real_world_job personalization prompt adapter is encoded."
+          },
+          {
+            "suite_id": "operator_debugging_ux",
+            "status": "blocked",
+            "evidence": "Local SDK get_all inspection is measured, but OpenMemory UI/export readback is blocked by the XY-931 export-helper setup probe until a dedicated OpenMemory compose/import path can load the same corpus into the OpenMemory app database."
+          }
+        ],
+        "scenarios": [
+          {
+            "scenario_id": "basic_local_lifecycle",
+            "suite_id": "memory_evolution",
+            "status": "pass",
+            "elf_position": "ties",
+            "comparison_outcome": "tie",
+            "evidence": "Prior comparable baseline run live-baseline-20260611061612 reports ELF passing 8/8 local lifecycle checks and mem0 passing basic same-corpus retrieval, update, delete, and cold-start reload checks. This remains a basic local lifecycle tie at the encoded smoke surface and is not reused as history/UI evidence.",
+            "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker",
+            "artifact": "tmp/live-baseline/live-baseline-report.json"
+          },
+          {
+            "scenario_id": "preference_correction_history",
+            "suite_id": "personalization",
+            "status": "pass",
+            "elf_position": "loses",
+            "comparison_outcome": "loss",
+            "evidence": "Fresh scoped baseline run live-baseline-20260611122416 reports mem0 preference_correction_history as pass. ELF-side evidence comes from cargo make real-world-memory-live-adapters as summarized in docs/evidence/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md, which records ELF live memory-evolution preference as wrong_result. The current measured comparison is therefore an ELF loss on this history dimension until ELF temporal reconciliation is fixed.",
+            "command": "mem0: ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker; ELF: cargo make real-world-memory-live-adapters",
+            "artifact": "mem0: tmp/live-baseline/mem0-checks.json; ELF: tmp/real-world-memory/live-adapters/ and docs/evidence/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md"
+          },
+          {
+            "scenario_id": "entity_scoped_personalization",
+            "suite_id": "personalization",
+            "status": "pass",
+            "elf_position": "ties",
+            "comparison_outcome": "tie",
+            "evidence": "Fresh scoped baseline run live-baseline-20260611122416 reports mem0 entity_scoped_personalization as pass. ELF-side evidence comes from cargo make real-world-memory-live-adapters as summarized in docs/evidence/benchmarking/2026-06-11-competitor-strength-adoption-report.md, which records ELF and qmd passing the encoded personalization slice. This is a measured tie on the current scoped-preference surface.",
+            "command": "mem0: ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker; ELF: cargo make real-world-memory-live-adapters",
+            "artifact": "mem0: tmp/live-baseline/mem0-checks.json; ELF: tmp/real-world-memory/live-adapters/ and docs/evidence/benchmarking/2026-06-11-competitor-strength-adoption-report.md"
+          },
+          {
+            "scenario_id": "delete_audit_readback",
+            "suite_id": "memory_evolution",
+            "status": "pass",
+            "elf_position": "ties",
+            "comparison_outcome": "tie",
+            "evidence": "Fresh scoped baseline run live-baseline-20260611122416 reports mem0 delete_history_audit_readback as pass. ELF-side evidence comes from cargo make real-world-memory-live-adapters as summarized in docs/evidence/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md, which records ELF passing the delete/TTL tombstone job. The current measured delete-audit comparison is a tie.",
+            "command": "mem0: ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker; ELF: cargo make real-world-memory-live-adapters",
+            "artifact": "mem0: tmp/live-baseline/mem0-checks.json; ELF: tmp/real-world-memory/live-adapters/ and docs/evidence/benchmarking/2026-06-11-temporal-history-competitor-gap-report.md"
+          },
+          {
+            "scenario_id": "local_get_all_export_readback",
+            "suite_id": "operator_debugging_ux",
+            "status": "pass",
+            "elf_position": "untested",
+            "comparison_outcome": "not_tested",
+            "evidence": "Fresh scoped baseline run live-baseline-20260611122416 reports mem0 local_get_all_export_readback as pass. This is local SDK inspection/export-style readback, not OpenMemory UI evidence; ELF has no directly comparable live UI/export scoring row in this run.",
+            "command": "ELF_BASELINE_PROJECTS=mem0 cargo make baseline-live-docker",
+            "artifact": "tmp/live-baseline/mem0-checks.json"
+          },
+          {
+            "scenario_id": "openmemory_ui_export_readback",
+            "suite_id": "operator_debugging_ux",
+            "status": "blocked",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "The XY-931 OpenMemory export-helper setup probe is Docker-contained in the mem0 baseline run. It detects the OpenMemory product tree, UI package, compose file, and export helper, but Docker is unavailable inside the baseline-runner container before the helper can reach a running OpenMemory product container or app database. Basic lifecycle and local SDK get_all readback are not reused as UI/export proof.",
+            "command": "cargo make openmemory-ui-export-readback",
+            "artifact": "tmp/live-baseline/mem0-openmemory-ui-export.json"
+          },
+          {
+            "scenario_id": "hosted_platform_export",
+            "suite_id": "operator_debugging_ux",
+            "status": "unsupported",
+            "elf_position": "untested",
+            "comparison_outcome": "non_goal",
+            "evidence": "Hosted mem0 Platform export is explicitly outside the local OSS Docker comparison and is not counted as a local pass, loss, or blocker.",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
+          },
+          {
+            "scenario_id": "optional_graph_memory",
+            "suite_id": "memory_evolution",
+            "status": "not_encoded",
+            "elf_position": "untested",
+            "comparison_outcome": "non_goal",
+            "evidence": "Optional graph memory is kept as an opt-in scenario gate. It is not enabled in the default mem0 local OSS run and is not part of the default pass/fail comparison.",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
+          }
+        ],
+        "evidence": [
+          {
+            "kind": "runner",
+            "ref": "scripts/live-baseline-benchmark.sh",
+            "status": "real"
+          }
+        ],
+        "notes": [
+          "Separate local OSS mem0 SDK evidence from OpenMemory product UI/export claims.",
+          "A blocked OpenMemory export-helper setup probe is not an ELF win or loss until the product app can import and export the same local corpus."
+        ]
+      },
+      {
+        "adapter_id": "memsearch_live_baseline",
+        "project": "memsearch",
+        "adapter_kind": "docker_cli_same_corpus",
+        "evidence_class": "live_baseline_only",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "pass",
+        "setup": {
+          "status": "pass",
+          "evidence": "The live-baseline Docker runner can install memsearch and run its CLI path.",
+          "command": "ELF_BASELINE_PROJECTS=memsearch cargo make baseline-live-docker",
+          "artifact": "tmp/live-baseline/memsearch.log"
+        },
+        "run": {
+          "status": "pass",
+          "evidence": "Fresh comparable baseline run live-baseline-20260611061612 indexes a per-adapter corpus copy, rewrites and deletes files, reruns memsearch index, and reports memsearch 4/4 encoded checks passing.",
+          "artifact": "tmp/live-baseline/live-baseline-report.json"
+        },
+        "result": {
+          "status": "pass",
+          "evidence": "memsearch now passes the local same-corpus/reindex/update/delete/reload smoke. No real_world_job memsearch prompt adapter is encoded, so Markdown-first behavior remains baseline scenario evidence rather than suite pass evidence.",
+          "artifact": "tmp/live-baseline/live-baseline-report.json"
+        },
+        "capabilities": [
+          {
+            "capability": "canonical_markdown_store",
+            "status": "real",
+            "evidence": "memsearch is tracked as a Markdown-first source-of-truth reference."
+          },
+          {
+            "capability": "same_corpus_retrieval",
+            "status": "pass",
+            "evidence": "Fresh comparable baseline run live-baseline-20260611061612 reports memsearch retrieval_pass with 3/3 same-corpus retrieval checks."
+          },
+          {
+            "capability": "reindex_update_delete_reload",
+            "status": "pass",
+            "evidence": "The runner rewrites auth-memory.md, deletes a second corpus file, reruns memsearch index, and starts fresh memsearch search processes; the fresh scoped run reports update, delete, and cold-start reload passing."
+          },
+          {
+            "capability": "real_world_job_adapter",
+            "status": "not_encoded",
+            "evidence": "XY-925 adds fixture-backed prompt coverage for the Markdown source-store and retrieval-debug jobs, but no live memsearch runtime adapter executes real_world_job prompts and answer scoring."
+          },
+          {
+            "capability": "markdown_source_store_prompt_jobs",
+            "status": "pass",
+            "evidence": "The first-generation OSS fixture slice encodes source-of-truth rebuild/reload and retrieval-debug prompts over the canonical Markdown store while preserving the live-baseline-only evidence boundary."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "trust_source_of_truth",
+            "status": "not_encoded",
+            "evidence": "The Markdown-first source model passed the local reindex/reload smoke, and XY-925 adds fixture-backed source-of-truth prompt coverage over the canonical Markdown store. No live memsearch runtime adapter executes prompt scoring yet, so this is not a suite pass."
+          },
+          {
+            "suite_id": "retrieval",
+            "status": "not_encoded",
+            "evidence": "The Docker same-corpus check passes, and XY-925 adds fixture-backed retrieval-debug prompt coverage over memsearch CLI replay and Markdown source inspection. No live memsearch runtime adapter executes retrieval prompt scoring yet, so this is not a suite pass."
+          },
+          {
+            "suite_id": "memory_evolution",
+            "status": "not_encoded",
+            "evidence": "Update/delete reindex semantics pass in Docker, but memory_evolution real_world_job prompts are not encoded for memsearch."
+          }
+        ],
+        "scenarios": [
+          {
+            "scenario_id": "canonical_markdown_reindex_reload",
+            "suite_id": "trust_source_of_truth",
+            "status": "pass",
+            "elf_position": "untested",
+            "comparison_outcome": "not_tested",
+            "evidence": "Fresh comparable baseline run live-baseline-20260611061612 reports memsearch passed same-corpus retrieval, update reindex, delete suppression, and cold-start reload over a canonical Markdown corpus. ELF has no directly comparable canonical Markdown source-store scenario in this baseline, so the ELF position remains untested.",
+            "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker",
+            "artifact": "tmp/live-baseline/live-baseline-report.json"
+          },
+          {
+            "scenario_id": "markdown_source_store_rebuild_reload_prompt",
+            "suite_id": "trust_source_of_truth",
+            "status": "pass",
+            "elf_position": "untested",
+            "comparison_outcome": "not_tested",
+            "evidence": "XY-925 adds a checked-in real_world_job prompt fixture that asks for the memsearch source-of-truth path and rebuild/reload boundary: canonical Markdown files are authoritative, while the index is derived by rerunning memsearch index. This is fixture-backed scenario coverage plus baseline artifact evidence, not a memsearch live real_world_job suite pass.",
+            "command": "cargo make real-world-first-generation-oss",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/memsearch_markdown_rebuild_reload.json"
+          },
+          {
+            "scenario_id": "markdown_retrieval_debug_prompt",
+            "suite_id": "operator_debugging_ux",
+            "status": "pass",
+            "elf_position": "untested",
+            "comparison_outcome": "not_tested",
+            "evidence": "XY-925 adds a checked-in retrieval-debug prompt over memsearch's canonical Markdown store. The expected debug surface is CLI replay plus Markdown source inspection and reindexing; staged expansion/fusion/rerank/candidate-drop trace bundles remain not encoded for memsearch.",
+            "command": "cargo make real-world-first-generation-oss",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/memsearch_retrieval_debug_prompt.json"
+          },
+          {
+            "scenario_id": "ttl_expiry_lifecycle",
+            "suite_id": "memory_evolution",
+            "status": "unsupported",
+            "elf_position": "untested",
+            "comparison_outcome": "non_goal",
+            "evidence": "The encoded memsearch CLI path supports reindex/delete but no TTL or expiry behavior. Unsupported TTL behavior is preserved as unsupported competitor evidence and does not create an ELF win/loss claim without a directly comparable scenario artifact.",
+            "artifact": "tmp/live-baseline/live-baseline-report.json"
+          },
+          {
+            "scenario_id": "real_world_prompt_adapter",
+            "suite_id": "retrieval",
+            "status": "not_encoded",
+            "elf_position": "untested",
+            "comparison_outcome": "not_tested",
+            "evidence": "No live memsearch runtime adapter currently executes real_world_job prompts and answer scoring. XY-925 fixture-backed prompt jobs document the source-store and retrieval-debug shape, while baseline retrieval/reindex evidence remains separate from suite pass claims.",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
+          }
+        ],
+        "evidence": [
+          {
+            "kind": "runner",
+            "ref": "scripts/live-baseline-benchmark.sh",
+            "status": "real"
+          }
+        ],
+        "notes": [
+          "Do not mark memsearch worse solely because setup or local indexing is heavier; preserve the typed incomplete/wrong-result boundary."
+        ]
+      },
+      {
+        "adapter_id": "openviking_live_baseline",
+        "project": "OpenViking",
+        "adapter_kind": "docker_local_embed_same_corpus",
+        "evidence_class": "live_baseline_only",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "wrong_result",
+        "setup": {
+          "status": "pass",
+          "evidence": "OpenViking local-embed setup installed and imported pinned llama-cpp-python==0.3.28 from the CPU wheel index in Docker.",
+          "command": "ELF_BASELINE_PROJECTS=OpenViking cargo make baseline-live-docker",
+          "artifact": "tmp/live-baseline/OpenViking.log"
+        },
+        "run": {
+          "status": "wrong_result",
+          "evidence": "The adapter reached same-corpus add_resource/find and now exposes expected/matched/missing evidence ids, but returned 0 of 3 expected evidence-term matches in the smoke run.",
+          "artifact": "tmp/live-baseline/live-baseline-report.json"
+        },
+        "result": {
+          "status": "wrong_result",
+          "evidence": "The current OpenViking Docker evidence is a behavioral wrong_result, not a local embedding setup blocker and not a real_world_job pass.",
+          "artifact": "docs/runbook/benchmarking/live_baseline_benchmark.md"
+        },
+        "capabilities": [
+          {
+            "capability": "local_embed_setup",
+            "status": "pass",
+            "evidence": "Docker local embedding dependency setup is pinned to llama-cpp-python==0.3.28 from https://abetlen.github.io/llama-cpp-python/whl/cpu and reached import/runtime in the smoke run."
+          },
+          {
+            "capability": "same_corpus_retrieval",
+            "status": "wrong_result",
+            "evidence": "OpenViking add_resource/find returned resources but missed expected evidence-term matches for every smoke query."
+          },
+          {
+            "capability": "context_trajectory",
+            "status": "blocked",
+            "evidence": "OpenViking staged/hierarchical retrieval is now encoded as blocked context_trajectory fixtures until same-corpus expected evidence ids match and staged artifacts are materialized."
+          },
+          {
+            "capability": "real_world_job_adapter",
+            "status": "not_encoded",
+            "evidence": "No OpenViking adapter currently executes real_world_job prompts and answer scoring."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "retrieval",
+            "status": "wrong_result",
+            "evidence": "The Docker-local setup reached add_resource/find, but the retrieval check returned 0/3 expected evidence-term matches."
+          },
+          {
+            "suite_id": "work_resume",
+            "status": "not_encoded",
+            "evidence": "Hierarchical context resume scenarios are not encoded for OpenViking."
+          },
+          {
+            "suite_id": "context_trajectory",
+            "status": "blocked",
+            "evidence": "The staged retrieval, hierarchy selection, and recursive/context expansion fixtures are encoded as blocked behind same-corpus evidence output and staged artifact readback."
+          }
+        ],
+        "scenarios": [],
+        "evidence": [
+          {
+            "kind": "runner",
+            "ref": "scripts/live-baseline-benchmark.sh",
+            "status": "wrong_result"
+          }
+        ],
+        "execution_metadata": {
+          "sources": [
+            {
+              "label": "OpenViking repository",
+              "url": "https://github.com/volcengine/OpenViking/",
+              "evidence": "Official source for OpenViking local context database, resource, and retrieval APIs."
+            },
+            {
+              "label": "llama-cpp-python CPU wheel index",
+              "url": "https://abetlen.github.io/llama-cpp-python/whl/cpu",
+              "evidence": "Official prebuilt CPU wheel index used by the Docker-local embedding pin."
+            }
+          ],
+          "setup_path": "Run ELF_BASELINE_PROJECTS=OpenViking cargo make baseline-live-docker. The runner installs llama-cpp-python==0.3.28 with --only-binary llama-cpp-python from the CPU wheel index before OpenViking add_resource/find.",
+          "runtime_boundary": "docker-compose.baseline.yml baseline-runner container; no host-global OpenViking, llama-cpp-python, or model service install is required.",
+          "resource_expectation": "Local embedding setup may download a CPU wheel and model assets; record OpenViking.log, elapsed time, and cache size before claiming adapter quality.",
+          "retry_guidance": [
+            "Use the default pinned CPU wheel path first.",
+            "Override ELF_BASELINE_OPENVIKING_LLAMA_CPP_PYTHON_VERSION or ELF_BASELINE_OPENVIKING_LLAMA_CPP_PYTHON_INDEX only when the default wheel is unavailable for the Docker platform.",
+            "Treat install/import failure as incomplete, not wrong_result; treat add_resource/find evidence misses as wrong_result."
+          ]
+        },
+        "notes": [
+          "Record OpenViking as wrong_result now that the pinned Docker local embedding path reaches add_resource/find but misses expected evidence; keep context_trajectory as blocked until staged artifacts exist."
+        ],
+        "follow_up": {
+          "title": "Fix OpenViking evidence-bearing same-corpus retrieval output and materialize staged artifacts",
+          "reason": "The current adapter reaches add_resource/find and exposes expected evidence ids, but must match evidence ids and return stage/hierarchy/recursive artifacts before trajectory quality can be scored."
+        }
+      },
+      {
+        "adapter_id": "claude_mem_live_baseline",
+        "project": "claude-mem",
+        "adapter_kind": "docker_repository_same_corpus",
+        "evidence_class": "live_baseline_only",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "wrong_result",
+        "setup": {
+          "status": "pass",
+          "evidence": "The live-baseline Docker runner can install and build claude-mem.",
+          "command": "ELF_BASELINE_PROJECTS=claude-mem cargo make baseline-live-docker",
+          "artifact": "tmp/live-baseline/claude-mem.log"
+        },
+        "run": {
+          "status": "wrong_result",
+          "evidence": "The Docker runner now uses a durable SQLite file, exercises repository update/delete/reopen checks, and reports missed same-corpus or lifecycle evidence as typed non-pass.",
+          "artifact": "tmp/live-baseline/live-baseline-report.json"
+        },
+        "result": {
+          "status": "wrong_result",
+          "evidence": "No real_world_job claude-mem adapter is encoded; progressive disclosure remains a design reference.",
+          "artifact": "tmp/live-baseline/live-baseline-report.json"
+        },
+        "capabilities": [
+          {
+            "capability": "same_corpus_retrieval",
+            "status": "wrong_result",
+            "evidence": "The current Docker adapter did not prove correct same-corpus retrieval."
+          },
+          {
+            "capability": "durable_storage",
+            "status": "real",
+            "evidence": "The runner writes to a Docker-local SQLite file and constructs a new Database plus repository instances for cold-start recovery search."
+          },
+          {
+            "capability": "repository_lifecycle",
+            "status": "real",
+            "evidence": "The runner uses MemoryItemsRepository.update, deletes from the repository-owned memory_items table, and relies on repository FTS triggers for update/delete checks."
+          },
+          {
+            "capability": "repository_progressive_disclosure",
+            "status": "real",
+            "evidence": "The runner verifies search result to getById detail hydration and listSources source evidence on the durable repository path."
+          },
+          {
+            "capability": "progressive_disclosure_real_world_job",
+            "status": "pass",
+            "evidence": "XY-925 adds fixture-backed prompt coverage for the Docker-contained repository progressive-disclosure path: search result to getById detail hydration and listSources evidence on durable SQLite. Hook, timeline, and viewer workflows remain blocked separately."
+          },
+          {
+            "capability": "retrieval_repair_artifact",
+            "status": "wrong_result",
+            "evidence": "The same-corpus retrieval smoke remains wrong_result, and XY-925 records a repair prompt that tells operators to rerun ELF_BASELINE_PROJECTS=claude-mem cargo make baseline-live-docker before inspecting tmp/live-baseline/claude-mem.log and tmp/live-baseline/claude-mem-checks.json."
+          },
+          {
+            "capability": "hook_capture_viewer_workflow",
+            "status": "blocked",
+            "evidence": "The current Docker runner does not launch claude-mem hooks, timeline capture, local viewer readback, or an operator workflow over the same corpus."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "work_resume",
+            "status": "not_encoded",
+            "evidence": "The durable repository run is encoded, but hook-driven capture and real_world_job work-resume prompts are not proven by that local repository check."
+          },
+          {
+            "suite_id": "operator_debugging_ux",
+            "status": "blocked",
+            "evidence": "XY-925 adds fixture-backed progressive-disclosure and retrieval-repair prompt coverage, but local viewer/operator workflow remains blocked until a Docker-contained viewer or equivalent readback runner exists."
+          },
+          {
+            "suite_id": "capture_integration",
+            "status": "blocked",
+            "evidence": "claude-mem hook capture remains blocked because hooks, timeline capture, and observation workflows are not executed by this runner."
+          }
+        ],
+        "scenarios": [
+          {
+            "scenario_id": "same_corpus_retrieval",
+            "suite_id": "retrieval",
+            "status": "wrong_result",
+            "elf_position": "wins",
+            "comparison_outcome": "win",
+            "evidence": "Fresh comparable baseline run live-baseline-20260611061612 reports ELF retrieval_pass and claude-mem same_corpus_retrieval as wrong_result with 0/3 expected query checks passing, while its durable repository setup completed. This is an ELF baseline win for the narrow retrieval smoke scenario.",
+            "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker",
+            "artifact": "tmp/live-baseline/live-baseline-report.json"
+          },
+          {
+            "scenario_id": "retrieval_repair_artifact_path",
+            "suite_id": "retrieval",
+            "status": "wrong_result",
+            "elf_position": "wins",
+            "comparison_outcome": "win",
+            "evidence": "XY-925 adds a checked-in repair prompt that preserves the claude-mem wrong_result and names rerun/inspection targets from the reproducible Docker baseline: tmp/live-baseline/claude-mem.log and tmp/live-baseline/claude-mem-checks.json. This is repair evidence for a miss, not a retrieval pass.",
+            "command": "cargo make real-world-first-generation-oss",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_retrieval_repair.json"
+          },
+          {
+            "scenario_id": "repository_lifecycle_reload",
+            "suite_id": "memory_evolution",
+            "status": "pass",
+            "elf_position": "ties",
+            "comparison_outcome": "tie",
+            "evidence": "Fresh comparable baseline run live-baseline-20260611061612 reports ELF passing local lifecycle checks and claude-mem update, delete, and cold-start reload checks passing over a durable Docker-local SQLite repository. This is a local lifecycle-smoke tie, not a hook-driven work-resume or full progressive-disclosure job pass.",
+            "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker",
+            "artifact": "tmp/live-baseline/live-baseline-report.json"
+          },
+          {
+            "scenario_id": "progressive_disclosure_detail_hydration",
+            "suite_id": "operator_debugging_ux",
+            "status": "pass",
+            "elf_position": "untested",
+            "comparison_outcome": "not_tested",
+            "evidence": "claude-mem passed the repository-level search-to-detail/source hydration check, which is a useful progressive-disclosure signal. ELF does not have a directly comparable claude-mem-style progressive-disclosure scenario in this baseline, so the ELF position remains untested rather than a loss claim.",
+            "command": "ELF_BASELINE_PROJECTS=ELF,agentmemory,mem0,memsearch,claude-mem cargo make baseline-live-docker",
+            "artifact": "tmp/live-baseline/live-baseline-report.json"
+          },
+          {
+            "scenario_id": "progressive_disclosure_prompt",
+            "suite_id": "operator_debugging_ux",
+            "status": "pass",
+            "elf_position": "untested",
+            "comparison_outcome": "not_tested",
+            "evidence": "XY-925 adds fixture-backed prompt coverage that asks for the measured claude-mem progressive-disclosure boundary: repository search results hydrate through getById and listSources on durable SQLite, but hooks, timeline, viewer, and live prompt scoring are not executed.",
+            "command": "cargo make real-world-first-generation-oss",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_progressive_disclosure.json"
+          },
+          {
+            "scenario_id": "hook_capture_viewer_workflow",
+            "suite_id": "capture_integration",
+            "status": "blocked",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "The Docker baseline uses repository classes only. claude-mem hooks, viewer, timeline, and observation workflows are not executed by the runner, so XY-925 preserves this as a typed blocker rather than not_encoded prose.",
+            "command": "cargo make real-world-first-generation-oss",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_hook_viewer_blocked.json"
+          },
+          {
+            "scenario_id": "viewer_operator_workflow",
+            "suite_id": "operator_debugging_ux",
+            "status": "blocked",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "A fair claude-mem viewer/operator comparison needs a Docker-contained run that opens the local viewer or equivalent readback over the same durable SQLite corpus and emits timeline, detail hydration, and repair-command artifacts. That path is not available in the current runner.",
+            "command": "cargo make real-world-first-generation-oss",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/first_generation_oss/claude_mem_hook_viewer_blocked.json"
+          }
+        ],
+        "evidence": [
+          {
+            "kind": "runner",
+            "ref": "scripts/live-baseline-benchmark.sh",
+            "status": "real"
+          }
+        ],
+        "notes": [
+          "claude-mem remains a UX reference; durable repository checks do not prove hook, viewer, or full real-world progressive-disclosure behavior."
+        ]
+      },
+      {
+        "adapter_id": "qmd_deep_profile_gate",
+        "project": "qmd",
+        "adapter_kind": "docker_cli_deep_profile_gate",
+        "evidence_class": "research_gate",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "not_encoded",
+        "setup": {
+          "status": "pass",
+          "evidence": "qmd already has a Docker CLI live-baseline adapter; this gate records the deeper profile extension before a separate scaled run is claimed.",
+          "command": "ELF_BASELINE_PROJECTS=qmd ELF_BASELINE_PROFILE=stress cargo make baseline-live-docker",
+          "artifact": "tmp/live-baseline/qmd.log"
+        },
+        "run": {
+          "status": "not_encoded",
+          "evidence": "The XY-899 strength-profile report is checked in, but no new live qmd deep-profile adapter artifact is claimed from it."
+        },
+        "result": {
+          "status": "not_encoded",
+          "evidence": "The XY-899 report records qmd scenario-level retrieval/debug/replay outcomes and wrong-result diagnosis taxonomy, while expansion/fusion/rerank scoring remains not_encoded.",
+          "artifact": "docs/evidence/benchmarking/2026-06-11-qmd-openviking-strength-profile-report.md"
+        },
+        "capabilities": [
+          {
+            "capability": "stress_profile_retrieval_debug",
+            "status": "not_encoded",
+            "evidence": "The stress command path exists, but this adapter-pack gate has not published a deep qmd profile result."
+          },
+          {
+            "capability": "real_world_job_adapter",
+            "status": "not_encoded",
+            "evidence": "The qmd live real-world sweep covers the current encoded fixture corpus; expanded retrieval-debug strength suites still need their own materialized adapter run."
+          },
+          {
+            "capability": "host_global_install_boundary",
+            "status": "unsupported",
+            "evidence": "Repository-supported qmd benchmark runs must stay inside docker-compose.baseline.yml and must not require host-global installs."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "retrieval",
+            "status": "not_encoded",
+            "evidence": "A deeper stress retrieval-debug report is not checked in for this gate."
+          },
+          {
+            "suite_id": "operator_debugging_ux",
+            "status": "not_encoded",
+            "evidence": "qmd query planning and score readback are not yet scored as operator-debugging real_world_job outputs."
+          }
+        ],
+        "scenarios": [],
+        "evidence": [
+          {
+            "kind": "source",
+            "ref": "https://github.com/tobi/qmd",
+            "status": "real"
+          },
+          {
+            "kind": "runner",
+            "ref": "scripts/live-baseline-benchmark.sh",
+            "status": "real"
+          }
+        ],
+        "execution_metadata": {
+          "sources": [
+            {
+              "label": "qmd repository",
+              "url": "https://github.com/tobi/qmd",
+              "evidence": "Official qmd source for local hybrid search, CLI setup, and query behavior."
+            }
+          ],
+          "setup_path": "Use the existing Docker baseline qmd install, collection add, update, embed, and query flow with scale or stress profiles.",
+          "runtime_boundary": "docker-compose.baseline.yml baseline-runner container with project files and caches inside Docker volumes.",
+          "resource_expectation": "CPU local embedding and rerank cost scale with corpus size; record elapsed time and qmd log artifacts before claims.",
+          "retry_guidance": [
+            "Run qmd stress profile in Docker and publish the artifact path.",
+            "Map qmd JSON output to retrieval-debug real_world_job scoring before suite claims."
+          ],
+          "research_depth": "D2 reviewed; deep profile not encoded"
+        },
+        "notes": [
+          "This gate deepens qmd planning without changing the existing qmd pass evidence from the smoke live baseline."
+        ]
+      },
+      {
+        "adapter_id": "openviking_deep_profile_gate",
+        "project": "OpenViking",
+        "adapter_kind": "docker_local_embed_context_trajectory_gate",
+        "evidence_class": "research_gate",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "blocked",
+        "setup": {
+          "status": "pass",
+          "evidence": "The default pinned OpenViking local embedding dependency path reaches runtime in Docker.",
+          "command": "ELF_BASELINE_PROJECTS=OpenViking cargo make baseline-live-docker",
+          "artifact": "tmp/live-baseline/OpenViking.log"
+        },
+        "run": {
+          "status": "blocked",
+          "evidence": "The XY-928 context_trajectory fixtures encode staged retrieval, hierarchy selection, and recursive/context expansion as blocked; no live trajectory adapter artifact is claimed."
+        },
+        "result": {
+          "status": "blocked",
+          "evidence": "No OpenViking deep context-trajectory result is claimed from the current wrong-result smoke run; the XY-928 fixtures preserve trajectory surfaces as blocked/not_tested.",
+          "artifact": "docs/evidence/benchmarking/2026-06-11-qmd-openviking-strength-profile-report.md"
+        },
+        "capabilities": [
+          {
+            "capability": "docker_local_embed_setup",
+            "status": "pass",
+            "evidence": "The local embedding setup is pinned and reaches import/runtime in Docker."
+          },
+          {
+            "capability": "hierarchical_context_trajectory",
+            "status": "blocked",
+            "evidence": "Stage trajectory scoring is encoded as blocked until the smoke adapter returns evidence-bearing same-corpus output and selected hierarchy/expansion artifacts."
+          },
+          {
+            "capability": "host_global_install_boundary",
+            "status": "unsupported",
+            "evidence": "The adapter pack must not ask operators to install OpenViking dependencies globally on the host."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "retrieval",
+            "status": "wrong_result",
+            "evidence": "Same-corpus retrieval is still the precondition and remains wrong_result in the live baseline."
+          },
+          {
+            "suite_id": "context_trajectory",
+            "status": "blocked",
+            "evidence": "OpenViking staged retrieval, hierarchy selection, and recursive/context expansion jobs are encoded as blocked fixtures."
+          },
+          {
+            "suite_id": "operator_debugging_ux",
+            "status": "not_encoded",
+            "evidence": "Trajectory readback is a reference feature but not a scored adapter output."
+          }
+        ],
+        "scenarios": [],
+        "evidence": [
+          {
+            "kind": "source",
+            "ref": "https://github.com/volcengine/OpenViking/",
+            "status": "real"
+          },
+          {
+            "kind": "runner",
+            "ref": "scripts/live-baseline-benchmark.sh",
+            "status": "wrong_result"
+          }
+        ],
+        "execution_metadata": {
+          "sources": [
+            {
+              "label": "OpenViking repository",
+              "url": "https://github.com/volcengine/OpenViking/",
+              "evidence": "Official source for OpenViking local context database, resource, and retrieval APIs."
+            }
+          ],
+          "setup_path": "Use the pinned Docker local embedding path from scripts/live-baseline-benchmark.sh, then run OpenViking add_resource/find before any deep profile scoring.",
+          "runtime_boundary": "docker-compose.baseline.yml baseline-runner container; no host model or compiler setup outside Docker.",
+          "resource_expectation": "Local embedding setup can download CPU wheels and model assets; record build/import logs, model cache size, and elapsed time.",
+          "retry_guidance": [
+            "Run the default pinned llama-cpp-python==0.3.28 CPU wheel path first.",
+            "Override the OpenViking llama-cpp-python version or index only when the default wheel is unavailable for the Docker platform.",
+            "Fix evidence-bearing same-corpus output and materialize selected hierarchy/expansion artifacts before converting blocked context_trajectory fixtures into scored jobs."
+          ],
+          "research_depth": "D2 reviewed; local embedding setup pinned; blocked fixtures encoded"
+        },
+        "notes": [
+          "OpenViking remains a context-trajectory reference, but this gate prevents a smoke wrong_result or blocked fixture from becoming a deep-profile win claim."
+        ]
+      },
+      {
+        "adapter_id": "ragflow_research_gate",
+        "project": "RAGFlow",
+        "adapter_kind": "research_gate",
+        "evidence_class": "research_gate",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "blocked",
+        "setup": {
+          "status": "blocked",
+          "evidence": "XY-900 promotes the Docker-safe tiny-corpus evidence smoke into a generated real_world_job report while the checked-in row remains smoke-only research_gate evidence.",
+          "command": "cargo make smoke-ragflow-docker",
+          "artifact": "tmp/real-world-memory/ragflow-smoke/ragflow-smoke.json"
+        },
+        "run": {
+          "status": "blocked",
+          "evidence": "The live path requires explicit resource-envelope opt-in and a local self-hosted RAGFlow API key; setup failures stay typed in the generated smoke artifact.",
+          "command": "ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make smoke-ragflow-docker",
+          "artifact": "tmp/real-world-memory/ragflow-smoke/memory_projects_manifest.ragflow-smoke.json"
+        },
+        "result": {
+          "status": "blocked",
+          "evidence": "The smoke now emits ragflow-report.json and ragflow-report.md from one generated retrieval job. Pass or wrong_result is allowed only when returned reference chunks map to generated evidence ids; resource, setup, and API-key limits remain typed blockers.",
+          "artifact": "tmp/real-world-memory/ragflow-smoke/ragflow-report.json"
+        },
+        "capabilities": [
+          {
+            "capability": "adapter_candidate_verdict",
+            "status": "not_encoded",
+            "evidence": "XY-882 completed D1/D2 feasibility research and marks RAGFlow adapter_candidate; no adapter run is encoded."
+          },
+          {
+            "capability": "docker_service_setup",
+            "status": "blocked",
+            "evidence": "The smoke records official Docker setup, image/disk/startup envelope, CPU/GPU mode, vm.max_map_count handling, provider boundaries, and retry behavior."
+          },
+          {
+            "capability": "real_world_job_adapter",
+            "status": "blocked",
+            "evidence": "One generated retrieval job is scored from the smoke artifact or typed blocked when resource, service, or local API-key boundaries stop execution."
+          },
+          {
+            "capability": "quality_or_scale_claim",
+            "status": "not_encoded",
+            "evidence": "The scored smoke does not claim broad RAGFlow quality, private corpus behavior, scale, or comparative ranking."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "retrieval",
+            "status": "blocked",
+            "evidence": "The generated retrieval smoke is scored as pass, wrong_result, blocked, or incomplete by ragflow-report.json; the checked-in row remains blocked until live reference chunks map to evidence ids."
+          },
+          {
+            "suite_id": "knowledge_compilation",
+            "status": "not_encoded",
+            "evidence": "RAGFlow knowledge output is not mapped to real_world_job page or citation scoring."
+          },
+          {
+            "suite_id": "production_ops",
+            "status": "blocked",
+            "evidence": "Resource envelope and service startup retry guidance must be documented first."
+          }
+        ],
+        "scenarios": [
+          {
+            "scenario_id": "reference_chunk_citation_mapping",
+            "suite_id": "retrieval",
+            "status": "blocked",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "XY-929 adds a representative blocked fixture for RAGFlow reference-chunk citation scoring. The job must remain blocked until returned reference chunks include generated document ids, chunk ids, content, and document metadata mapped to benchmark evidence ids.",
+            "command": "cargo make real-world-memory-graph-rag",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/graph_rag/ragflow_reference_chunks_blocked.json"
+          },
+          {
+            "scenario_id": "private_or_large_corpus_ragflow_quality",
+            "suite_id": "retrieval",
+            "status": "not_encoded",
+            "elf_position": "untested",
+            "comparison_outcome": "non_goal",
+            "evidence": "Private corpus, large-corpus, and hosted RAGFlow quality are outside the generated-public Docker representative lane and must not be inferred from smoke reports.",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
+          }
+        ],
+        "evidence": [
+          {
+            "kind": "source",
+            "ref": "https://github.com/infiniflow/ragflow",
+            "status": "real"
+          },
+          {
+            "kind": "source",
+            "ref": "https://ragflow.io/docs/",
+            "status": "real"
+          },
+          {
+            "kind": "artifact",
+            "ref": "tmp/real-world-memory/ragflow-smoke/ragflow-report.json",
+            "status": "blocked"
+          },
+          {
+            "kind": "artifact",
+            "ref": "tmp/real-world-memory/ragflow-smoke/ragflow-report.md",
+            "status": "blocked"
+          }
+        ],
+        "execution_metadata": {
+          "sources": [
+            {
+              "label": "RAGFlow repository",
+              "url": "https://github.com/infiniflow/ragflow",
+              "evidence": "Official source for RAGFlow service code and Docker Compose setup."
+            },
+            {
+              "label": "RAGFlow docs",
+              "url": "https://ragflow.io/docs/",
+              "evidence": "Official deployment and setup documentation."
+            },
+            {
+              "label": "RAGFlow HTTP API reference",
+              "url": "https://raw.githubusercontent.com/infiniflow/ragflow/main/docs/references/http_api_reference.md",
+              "evidence": "Official reference for OpenAI-compatible responses with reference chunks and document metadata."
+            }
+          ],
+          "setup_path": "Implement a tiny Docker evidence-smoke runner using the official Docker deployment, dataset ingest API, and OpenAI-compatible query API.",
+          "runtime_boundary": "Run scripts/ragflow-docker-evidence-smoke.sh through cargo make; the live path uses the official RAGFlow Docker Compose service boundary without host-global RAGFlow installs.",
+          "resource_expectation": "Large multi-service RAG stack; generated artifacts record CPU/GPU mode, memory, disk, image size, expanded disk notes, startup time, vm.max_map_count handling, and provider boundaries before scoring.",
+          "retry_guidance": [
+            "Run cargo make smoke-ragflow-docker first to produce a typed preflight artifact.",
+            "Start the live path only with ELF_RAGFLOW_SMOKE_START=1 and ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1.",
+            "Keep private corpora and operator-owned provider credentials out of this smoke; map only generated public corpus reference chunks to evidence ids."
+          ],
+          "research_depth": "D2 feasibility verdict plus XY-885 evidence-smoke implementation and XY-900 scored smoke promotion; checked-in record remains research_gate unless a generated artifact reaches query output"
+        },
+        "notes": [
+          "Status class: smoke-only scored adapter path with typed resource/setup/API-key blockers.",
+          "Do not interpret ragflow-report.json as broad RAGFlow quality evidence unless reference chunks map to generated evidence ids."
+        ],
+        "follow_up": {
+          "title": "[ELF benchmark adapter] Implement RAGFlow Docker evidence-smoke adapter",
+          "reason": "Created as XY-885. XY-882 found a Docker boundary and reference-chunk output contract; implementation must prove a tiny ingest/query run before any quality claim."
+        }
+      },
+      {
+        "adapter_id": "lightrag_research_gate",
+        "project": "LightRAG",
+        "adapter_kind": "research_gate",
+        "evidence_class": "research_gate",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "blocked",
+        "setup": {
+          "status": "blocked",
+          "evidence": "XY-886 adds a Docker-profile context-export smoke command, and XY-900 keeps its generated retrieval fixtures scored through real_world_job_benchmark. The checked-in row remains smoke-only research_gate evidence.",
+          "command": "cargo make smoke-lightrag-docker-context",
+          "artifact": "tmp/real-world-memory/lightrag-context/lightrag-materialization.json"
+        },
+        "run": {
+          "status": "blocked",
+          "evidence": "The default smoke records a typed setup/runtime failure if the LightRAG API is unavailable; set ELF_LIGHTRAG_CONTEXT_START=1 to start the opt-in Docker service profile.",
+          "command": "ELF_LIGHTRAG_CONTEXT_START=1 cargo make smoke-lightrag-docker-context",
+          "artifact": "tmp/real-world-memory/lightrag-context/summary.json"
+        },
+        "result": {
+          "status": "blocked",
+          "evidence": "The smoke emits lightrag-report.json and lightrag-report.md over generated retrieval jobs. Pass or wrong_result is allowed only when returned context, references, or file paths map to generated evidence ids.",
+          "artifact": "tmp/real-world-memory/lightrag-context/lightrag-report.json"
+        },
+        "capabilities": [
+          {
+            "capability": "docker_service_setup",
+            "status": "blocked",
+            "evidence": "The opt-in compose profile records explicit LightRAG image, LLM, embedding, rerank, workspace, and Docker volume configuration without host-global installs."
+          },
+          {
+            "capability": "retrieved_context_export",
+            "status": "blocked",
+            "evidence": "The materializer calls /documents/texts, waits on /documents/track_status, and queries /query with only_need_context plus chunk references when the service is reachable."
+          },
+          {
+            "capability": "real_world_job_adapter",
+            "status": "blocked",
+            "evidence": "The LightRAG materializer rewrites generated retrieval fixtures with adapter_response evidence only when source paths or context map to required evidence ids."
+          },
+          {
+            "capability": "quality_or_scale_claim",
+            "status": "not_encoded",
+            "evidence": "The smoke does not score broad graph-RAG quality, private corpora, scale, or comparative ranking claims."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "retrieval",
+            "status": "blocked",
+            "evidence": "The generated smoke can exercise retrieval context/source mapping for retrieval fixtures, but the checked-in record stays blocked until a live artifact reaches query output."
+          },
+          {
+            "suite_id": "memory_evolution",
+            "status": "not_encoded",
+            "evidence": "LightRAG update/delete/current-versus-historical behavior is not encoded by the context-export smoke."
+          },
+          {
+            "suite_id": "operator_debugging_ux",
+            "status": "not_encoded",
+            "evidence": "The smoke records context/source mappings, but full trace or viewer diagnostics are not mapped to benchmark scoring."
+          }
+        ],
+        "scenarios": [
+          {
+            "scenario_id": "context_source_reference_mapping",
+            "suite_id": "retrieval",
+            "status": "incomplete",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "XY-929 adds a representative incomplete fixture for LightRAG context/source-reference scoring. The job cannot score until the opt-in Docker API exports generated source file paths, snippets, or reference content.",
+            "command": "cargo make real-world-memory-graph-rag",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/graph_rag/lightrag_context_sources_incomplete.json"
+          },
+          {
+            "scenario_id": "graph_rag_navigation_quality",
+            "suite_id": "retrieval",
+            "status": "not_encoded",
+            "elf_position": "untested",
+            "comparison_outcome": "not_tested",
+            "evidence": "LightRAG graph-RAG navigation quality remains not_tested beyond the context-source output contract; no ELF win, tie, or loss is claimed.",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
+          }
+        ],
+        "evidence": [
+          {
+            "kind": "source",
+            "ref": "https://github.com/HKUDS/LightRAG",
+            "status": "real"
+          },
+          {
+            "kind": "source",
+            "ref": "https://github.com/HKUDS/LightRAG/blob/main/docs/DockerDeployment.md",
+            "status": "real"
+          },
+          {
+            "kind": "command",
+            "ref": "cargo make smoke-lightrag-docker-context",
+            "status": "blocked"
+          },
+          {
+            "kind": "artifact",
+            "ref": "tmp/real-world-memory/lightrag-context/lightrag-materialization.json",
+            "status": "blocked"
+          },
+          {
+            "kind": "artifact",
+            "ref": "tmp/real-world-memory/lightrag-context/lightrag-report.md",
+            "status": "blocked"
+          }
+        ],
+        "execution_metadata": {
+          "sources": [
+            {
+              "label": "LightRAG repository",
+              "url": "https://github.com/HKUDS/LightRAG",
+              "evidence": "Official source for LightRAG server, Docker, and retrieval modes."
+            },
+            {
+              "label": "LightRAG Docker docs",
+              "url": "https://github.com/HKUDS/LightRAG/blob/main/docs/DockerDeployment.md",
+              "evidence": "Official Docker deployment reference."
+            },
+            {
+              "label": "LightRAG API server docs",
+              "url": "https://github.com/HKUDS/LightRAG/blob/main/docs/LightRAG-API-Server.md",
+              "evidence": "Official query-mode and context-output reference."
+            },
+            {
+              "label": "LightRAG core programming docs",
+              "url": "https://github.com/HKUDS/LightRAG/blob/main/docs/ProgramingWithCore.md",
+              "evidence": "Official source-id and file-path citation reference."
+            }
+          ],
+          "setup_path": "Run cargo make smoke-lightrag-docker-context for a typed preflight artifact; set ELF_LIGHTRAG_CONTEXT_START=1 to start the opt-in LightRAG Docker profile and attempt live context export.",
+          "runtime_boundary": "docker-compose.baseline.yml baseline-runner plus opt-in lightrag and lightrag-mock-provider services; generated source files and LightRAG data stay in Docker-mounted artifact paths and Docker volumes.",
+          "resource_expectation": "The default profile uses the official LightRAG image, a local OpenAI-compatible mock provider, 64-dimensional embeddings, rerank disabled for context queries, cargo/pip/Hugging Face caches, and Docker volumes for rag_storage, inputs, and prompts.",
+          "retry_guidance": [
+            "Run cargo make smoke-lightrag-docker-context first; a missing API must remain a typed incomplete artifact, not a pass claim.",
+            "Set ELF_LIGHTRAG_CONTEXT_START=1 only when Docker may pull/start the LightRAG service profile.",
+            "Score retrieval only when returned context, references.file_path, or references.content map to required evidence ids."
+          ],
+          "research_depth": "D2 feasibility plus XY-886 context-export implementation and XY-900 scored smoke aggregation; checked-in record remains research_gate unless a generated artifact reaches query output"
+        },
+        "notes": [
+          "Status class: smoke-only scored adapter path with typed service/setup blockers.",
+          "Do not interpret lightrag-report.json as broad graph-RAG quality evidence unless generated source/context mappings score as pass."
+        ],
+        "follow_up": {
+          "title": "[ELF benchmark adapter] Implement LightRAG Docker context-export adapter",
+          "reason": "Created as XY-886. XY-882 found a Docker service path and context/source mapping contract; implementation must prove evidence export before scoring."
+        }
+      },
+      {
+        "adapter_id": "graphrag_research_gate",
+        "project": "GraphRAG",
+        "adapter_kind": "research_gate",
+        "evidence_class": "research_gate",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "blocked",
+        "setup": {
+          "status": "blocked",
+          "evidence": "XY-900 promotes the Docker-safe generated-corpus GraphRAG smoke into a scored knowledge_compilation report while the checked-in row remains smoke-only research_gate evidence.",
+          "command": "cargo make smoke-graphrag-docker",
+          "artifact": "tmp/real-world-memory/graphrag-smoke/graphrag-smoke.json"
+        },
+        "run": {
+          "status": "blocked",
+          "evidence": "The default smoke records a typed blocked artifact without model calls; set ELF_GRAPHRAG_SMOKE_RUN=1 with explicit provider configuration to attempt live GraphRAG index/query.",
+          "command": "ELF_GRAPHRAG_SMOKE_RUN=1 cargo make smoke-graphrag-docker",
+          "artifact": "tmp/real-world-memory/graphrag-smoke/summary.json"
+        },
+        "result": {
+          "status": "blocked",
+          "evidence": "The smoke now emits graphrag-report.json and graphrag-report.md from one generated knowledge_compilation job. Pass or wrong_result is allowed only when GraphRAG output tables map to generated evidence ids.",
+          "artifact": "tmp/real-world-memory/graphrag-smoke/graphrag-report.json"
+        },
+        "capabilities": [
+          {
+            "capability": "indexing_resource_envelope",
+            "status": "blocked",
+            "evidence": "The smoke bounds the generated public corpus, timeout, GraphRAG package, model configuration, cache size, output size, elapsed time, and observed cache entries."
+          },
+          {
+            "capability": "source_citation_mapping",
+            "status": "blocked",
+            "evidence": "The generated artifact maps GraphRAG documents, text_units, communities, community_reports, entities, and relationships parquet rows back to real_world_job evidence ids when available."
+          },
+          {
+            "capability": "real_world_job_adapter",
+            "status": "blocked",
+            "evidence": "The smoke writes a generated real_world_job fixture and scored report; provider/setup limits remain blocked until live GraphRAG output maps to expected evidence ids."
+          },
+          {
+            "capability": "quality_or_scale_claim",
+            "status": "not_encoded",
+            "evidence": "The smoke does not claim broad graph-navigation quality, knowledge-synthesis quality, private corpora, or large-corpus indexing."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "knowledge_compilation",
+            "status": "blocked",
+            "evidence": "The generated smoke can exercise parquet table source coverage for one tiny knowledge-compilation fixture, but the checked-in record stays blocked until live output exists."
+          },
+          {
+            "suite_id": "retrieval",
+            "status": "not_encoded",
+            "evidence": "The smoke may run local search for reachability, but retrieval quality scoring is not encoded."
+          },
+          {
+            "suite_id": "production_ops",
+            "status": "not_encoded",
+            "evidence": "Resource bounds are recorded, but no production-ops suite scoring is encoded."
+          },
+          {
+            "suite_id": "memory_evolution",
+            "status": "not_encoded",
+            "evidence": "GraphRAG update/delete/current-versus-historical behavior is not encoded by the smoke."
+          }
+        ],
+        "scenarios": [
+          {
+            "scenario_id": "output_table_citation_mapping",
+            "suite_id": "knowledge_compilation",
+            "status": "blocked",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "XY-929 adds a representative blocked fixture for GraphRAG output-table citation scoring. The job requires provider-backed Docker output tables whose document, text-unit, community, report, entity, and relationship identifiers map to generated evidence ids.",
+            "command": "cargo make real-world-memory-graph-rag",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/graph_rag/graphrag_output_tables_blocked.json"
+          },
+          {
+            "scenario_id": "graph_summary_synthesis_quality",
+            "suite_id": "knowledge_compilation",
+            "status": "not_encoded",
+            "elf_position": "untested",
+            "comparison_outcome": "not_tested",
+            "evidence": "GraphRAG graph-summary synthesis quality remains not_tested until provider-backed output tables and local-search context are scored beyond the smoke contract.",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
+          }
+        ],
+        "evidence": [
+          {
+            "kind": "source",
+            "ref": "https://github.com/microsoft/graphrag",
+            "status": "real"
+          },
+          {
+            "kind": "source",
+            "ref": "https://microsoft.github.io/graphrag/",
+            "status": "real"
+          },
+          {
+            "kind": "command",
+            "ref": "cargo make smoke-graphrag-docker",
+            "status": "blocked"
+          },
+          {
+            "kind": "artifact",
+            "ref": "tmp/real-world-memory/graphrag-smoke/graphrag-smoke.json",
+            "status": "blocked"
+          },
+          {
+            "kind": "artifact",
+            "ref": "tmp/real-world-memory/graphrag-smoke/graphrag-report.md",
+            "status": "blocked"
+          }
+        ],
+        "execution_metadata": {
+          "sources": [
+            {
+              "label": "GraphRAG repository",
+              "url": "https://github.com/microsoft/graphrag",
+              "evidence": "Official Microsoft GraphRAG source and setup reference."
+            },
+            {
+              "label": "GraphRAG docs",
+              "url": "https://microsoft.github.io/graphrag/",
+              "evidence": "Official documentation for indexing and querying."
+            },
+            {
+              "label": "GraphRAG input docs",
+              "url": "https://microsoft.github.io/graphrag/index/inputs/",
+              "evidence": "Official input format and document metadata reference."
+            },
+            {
+              "label": "GraphRAG output tables",
+              "url": "https://microsoft.github.io/graphrag/index/outputs/",
+              "evidence": "Official output schema with document, text unit, community, and relationship identifiers."
+            },
+            {
+              "label": "GraphRAG local search docs",
+              "url": "https://microsoft.github.io/graphrag/query/local_search/",
+              "evidence": "Official local-search context and graph traversal reference."
+            }
+          ],
+          "setup_path": "Run cargo make smoke-graphrag-docker for a typed preflight artifact; set ELF_GRAPHRAG_SMOKE_RUN=1 with explicit provider configuration for a live GraphRAG index/query attempt.",
+          "runtime_boundary": "docker-compose.baseline.yml baseline-runner, container-local Python venv, generated public corpus, and report artifacts under tmp/real-world-memory/graphrag-smoke.",
+          "resource_expectation": "The default profile uses a generated public corpus capped by ELF_GRAPHRAG_MAX_DOCS and ELF_GRAPHRAG_MAX_INPUT_CHARS, pins GraphRAG through ELF_GRAPHRAG_PACKAGE, and records elapsed time, cache size, output size, and observed cache entries.",
+          "retry_guidance": [
+            "Run cargo make smoke-graphrag-docker first; missing provider configuration must remain a typed blocked artifact, not a pass claim.",
+            "Enable ELF_GRAPHRAG_SMOKE_RUN=1 only for generated public corpus indexing with explicit provider configuration.",
+            "Fail typed if source document or text_unit identifiers cannot be mapped to expected evidence IDs."
+          ],
+          "research_depth": "D2 feasibility plus XY-887 Docker smoke implementation and XY-900 scored smoke promotion; checked-in record remains research_gate unless a generated artifact reaches GraphRAG output"
+        },
+        "notes": [
+          "Status class: smoke-only scored adapter path with typed provider/setup blockers.",
+          "Do not interpret graphrag-report.json as broad graph-navigation or knowledge-synthesis quality evidence unless output tables map to generated evidence ids."
+        ],
+        "follow_up": {
+          "title": "[ELF benchmark adapter] Implement GraphRAG cost-bounded Docker adapter",
+          "reason": "Created as XY-887. XY-882 found a Docker-bounded CLI/API path and output-table evidence handles; implementation must stay tiny and cost-recorded."
+        }
+      },
+      {
+        "adapter_id": "graphiti_zep_research_gate",
+        "project": "Graphiti/Zep",
+        "adapter_kind": "research_gate",
+        "evidence_class": "research_gate",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "blocked",
+        "setup": {
+          "status": "blocked",
+          "evidence": "XY-900 promotes the Docker-contained Graphiti/Zep temporal smoke into a scored memory_evolution report while the checked-in row remains smoke-only research_gate evidence.",
+          "command": "cargo make smoke-graphiti-zep-docker-temporal",
+          "artifact": "tmp/real-world-memory/graphiti-zep-smoke/graphiti-zep-smoke.json"
+        },
+        "run": {
+          "status": "blocked",
+          "evidence": "The default smoke records a typed setup/runtime failure if live execution is not explicitly enabled. Set ELF_GRAPHITI_ZEP_SMOKE_START=1 and ELF_GRAPHITI_ZEP_SMOKE_RUN=1 with explicit provider configuration to start Docker-local FalkorDB and run Graphiti.",
+          "command": "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make smoke-graphiti-zep-docker-temporal",
+          "artifact": "tmp/real-world-memory/graphiti-zep-smoke/summary.json"
+        },
+        "result": {
+          "status": "blocked",
+          "evidence": "The smoke now emits graphiti-zep-report.json and graphiti-zep-report.md from one generated memory_evolution job. The default blocker is live-run opt-in disabled; when ELF_GRAPHITI_ZEP_SMOKE_START=1 and ELF_GRAPHITI_ZEP_SMOKE_RUN=1 are set without provider credentials, the blocker is provider_api_key_missing. No hosted Zep service or unrecorded credentials are used.",
+          "artifact": "tmp/real-world-memory/graphiti-zep-smoke/graphiti-zep-report.json"
+        },
+        "capabilities": [
+          {
+            "capability": "temporal_graph_memory",
+            "status": "blocked",
+            "evidence": "The smoke materializes generated current, historical, and rationale facts with validity windows, but the checked-in record stays blocked until a live artifact maps search output."
+          },
+          {
+            "capability": "docker_graph_store_setup",
+            "status": "blocked",
+            "evidence": "The task uses a Docker Compose graphiti-zep profile for FalkorDB and a container-local Python venv; no host-global graph database or hosted Zep service is used."
+          },
+          {
+            "capability": "real_world_job_adapter",
+            "status": "blocked",
+            "evidence": "The generated temporal-validity fixture is scored or typed blocked; live quality evidence requires Graphiti/Zep search output mapped to current and historical evidence ids."
+          },
+          {
+            "capability": "quality_or_scale_claim",
+            "status": "not_encoded",
+            "evidence": "The smoke does not claim broad graph-memory quality, managed Zep service behavior, private-corpus behavior, or large-corpus performance."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "memory_evolution",
+            "status": "blocked",
+            "evidence": "Generated current/historical relation facts are encoded, but the checked-in manifest stays blocked until the Docker smoke returns validity-window search output."
+          },
+          {
+            "suite_id": "retrieval",
+            "status": "not_encoded",
+            "evidence": "Hybrid graph retrieval reachability is not scored beyond the temporal search smoke."
+          },
+          {
+            "suite_id": "production_ops",
+            "status": "not_encoded",
+            "evidence": "The smoke records setup and provider boundaries but does not encode backup, restore, private corpus, or hosted-service operations."
+          }
+        ],
+        "scenarios": [
+          {
+            "scenario_id": "temporal_validity_window_mapping",
+            "suite_id": "memory_evolution",
+            "status": "blocked",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "XY-929 adds a representative blocked fixture for Graphiti/Zep temporal-validity scoring. The job remains blocked until provider-backed Docker output maps current and historical validity-window facts to generated evidence ids.",
+            "command": "cargo make real-world-memory-graph-rag",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/graph_rag/graphiti_temporal_validity_blocked.json"
+          },
+          {
+            "scenario_id": "hosted_zep_temporal_memory",
+            "suite_id": "memory_evolution",
+            "status": "unsupported",
+            "elf_position": "untested",
+            "comparison_outcome": "non_goal",
+            "evidence": "Hosted Zep service behavior is outside the Docker-local representative lane; no hosted-service result is used as ELF win/loss evidence.",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
+          }
+        ],
+        "evidence": [
+          {
+            "kind": "source",
+            "ref": "https://github.com/getzep/graphiti",
+            "status": "real"
+          },
+          {
+            "kind": "source",
+            "ref": "https://www.getzep.com/platform/graphiti/",
+            "status": "real"
+          },
+          {
+            "kind": "command",
+            "ref": "cargo make smoke-graphiti-zep-docker-temporal",
+            "status": "blocked"
+          },
+          {
+            "kind": "artifact",
+            "ref": "tmp/real-world-memory/graphiti-zep-smoke/graphiti-zep-smoke.json",
+            "status": "blocked"
+          },
+          {
+            "kind": "artifact",
+            "ref": "tmp/real-world-memory/graphiti-zep-smoke/graphiti-zep-report.md",
+            "status": "blocked"
+          }
+        ],
+        "execution_metadata": {
+          "sources": [
+            {
+              "label": "Graphiti repository",
+              "url": "https://github.com/getzep/graphiti",
+              "evidence": "Official open-source temporal context graph engine."
+            },
+            {
+              "label": "Zep Graphiti overview",
+              "url": "https://www.getzep.com/platform/graphiti/",
+              "evidence": "Official product documentation for temporal context graph behavior."
+            },
+            {
+              "label": "Graphiti quick start",
+              "url": "https://help.getzep.com/graphiti/getting-started/quick-start",
+              "evidence": "Official setup, episode ingest, and search output reference."
+            },
+            {
+              "label": "Graphiti FalkorDB configuration",
+              "url": "https://help.getzep.com/graphiti/configuration/falkor-db-configuration",
+              "evidence": "Official Docker-local FalkorDB setup reference."
+            },
+            {
+              "label": "Graphiti fact triples",
+              "url": "https://help.getzep.com/graphiti/working-with-data/adding-fact-triples",
+              "evidence": "Official manual fact-triple ingest contract."
+            }
+          ],
+          "setup_path": "Run cargo make smoke-graphiti-zep-docker-temporal for a typed artifact; set ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 with explicit provider configuration for a live attempt.",
+          "runtime_boundary": "docker-compose.baseline.yml baseline-runner plus graphiti-zep FalkorDB profile, container-local Python venv, generated public temporal facts, and report artifacts under tmp/real-world-memory/graphiti-zep-smoke.",
+          "resource_expectation": "Requires Docker-local FalkorDB plus LLM/embedding configuration; generated artifacts record service startup, storage size, provider boundaries, fact count, and timeout before scoring.",
+          "retry_guidance": [
+            "Run cargo make smoke-graphiti-zep-docker-temporal first to produce a typed blocked artifact.",
+            "Start the live path only with ELF_GRAPHITI_ZEP_SMOKE_START=1, ELF_GRAPHITI_ZEP_SMOKE_RUN=1, and explicit provider configuration.",
+            "Treat missing validity windows or unmapped current/historical facts as wrong_result, not pass."
+          ],
+          "research_depth": "D2 feasibility plus XY-888 Docker temporal smoke implementation and XY-900 scored smoke promotion; checked-in record remains research_gate unless a generated artifact reaches Graphiti search output"
+        },
+        "notes": [
+          "Status class: smoke-only scored adapter path with typed live-run opt-in, provider, and setup blockers.",
+          "Graphiti/Zep remains the temporal-validity reference; do not claim ELF-over-Graphiti/Zep until provider-backed temporal output maps to scored evidence ids."
+        ],
+        "follow_up": {
+          "title": "[ELF benchmark adapter] Implement Graphiti/Zep temporal graph adapter",
+          "reason": "Created as XY-888. XY-882 found a Docker-local graph-store path and fact/validity-window output contract for memory_evolution scoring."
+        }
+      },
+      {
+        "adapter_id": "letta_research_gate",
+        "project": "Letta",
+        "adapter_kind": "research_gate",
+        "evidence_class": "research_gate",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "blocked",
+        "setup": {
+          "status": "blocked",
+          "evidence": "Letta is D1 reviewed as a core/archival memory reference. The contained comparison contract now has cargo make smoke-letta-core-archive-export-readback, a Docker-only benchmark-created agent export/readback materializer that must return core block JSON, archival search/readback JSON, and source ids before any scenario claim is scored.",
+          "command": "cargo make smoke-letta-core-archive-export-readback",
+          "artifact": "tmp/real-world-memory/letta-core-archive/letta-core-archive-export.json"
+        },
+        "run": {
+          "status": "blocked",
+          "evidence": "The default materializer emits a typed blocked report unless a Docker-local Letta server and explicit model/provider configuration produce benchmark-owned core block export and archival readback/search output.",
+          "command": "ELF_LETTA_SMOKE_START=1 ELF_LETTA_SMOKE_RUN=1 cargo make smoke-letta-core-archive-export-readback",
+          "artifact": "tmp/real-world-memory/letta-core-archive/summary.json"
+        },
+        "result": {
+          "status": "blocked",
+          "evidence": "No Letta core block, archival fallback, stale-core, scope, provenance, or project-decision pass/win/tie/loss is claimed until the generated export/readback artifact maps required source ids.",
+          "artifact": "tmp/real-world-memory/letta-core-archive/report.json"
+        },
+        "capabilities": [
+          {
+            "capability": "core_archival_memory",
+            "status": "blocked",
+            "evidence": "ELF fixture jobs score core block attachment, scope, provenance, stale-core detection, archival fallback, and project-decision recovery separately from archival note search; Letta remains blocked until its export maps equivalent source ids."
+          },
+          {
+            "capability": "docker_embedding_configuration",
+            "status": "blocked",
+            "evidence": "Official Docker setup requires explicit embedding configuration before archival retrieval can be tested."
+          },
+          {
+            "capability": "real_world_job_adapter",
+            "status": "blocked",
+            "evidence": "A Docker-contained materializer now exists and emits typed blocked evidence by default; live scoring still requires exported Letta core blocks, archival list/search JSON, and source-id mappings."
+          },
+          {
+            "capability": "broad_letta_quality_claim",
+            "status": "not_encoded",
+            "evidence": "The materializer does not score broad Letta product quality, hosted/private state, personalization breadth, or production durability."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "personalization",
+            "status": "not_encoded",
+            "evidence": "Core memory preference application is not encoded for Letta."
+          },
+          {
+            "suite_id": "project_decisions",
+            "status": "blocked",
+            "evidence": "The project-decision recovery row is represented only through the core_archival_memory export/readback materializer and remains blocked without mapped source ids."
+          },
+          {
+            "suite_id": "work_resume",
+            "status": "not_encoded",
+            "evidence": "Agent resumption through Letta memory blocks is not encoded."
+          },
+          {
+            "suite_id": "core_archival_memory",
+            "status": "blocked",
+            "evidence": "A Docker-contained materializer now emits the core_archival_memory scenarios as typed blocked unless live Letta export/readback maps core block JSON, archival search/readback JSON, and source ids."
+          }
+        ],
+        "scenarios": [
+          {
+            "scenario_id": "core_block_attachment_readback",
+            "suite_id": "core_archival_memory",
+            "status": "blocked",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "ELF fixture core-archival-core-block-attachment-001 scores exact core block attachment and keeps core readback out of Qdrant-backed archival search. Letta remains blocked until the generated export/readback artifact maps this core block attachment source id.",
+            "command": "cargo make smoke-letta-core-archive-export-readback",
+            "artifact": "tmp/real-world-memory/letta-core-archive/summary.json"
+          },
+          {
+            "scenario_id": "core_block_scope_readback",
+            "suite_id": "core_archival_memory",
+            "status": "blocked",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "ELF fixture core-archival-core-block-scope-001 scores read_profile, shared scope, and private-owner boundaries. Letta scope behavior remains blocked until the generated export includes agent, block, visibility metadata, and source ids.",
+            "command": "cargo make smoke-letta-core-archive-export-readback",
+            "artifact": "tmp/real-world-memory/letta-core-archive/summary.json"
+          },
+          {
+            "scenario_id": "core_block_provenance_readback",
+            "suite_id": "core_archival_memory",
+            "status": "blocked",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "ELF fixture core-archival-core-block-provenance-001 scores source_ref and audit_history readback. Letta provenance remains blocked until exported core memory includes stable source ids and audit-equivalent events.",
+            "command": "cargo make smoke-letta-core-archive-export-readback",
+            "artifact": "tmp/real-world-memory/letta-core-archive/summary.json"
+          },
+          {
+            "scenario_id": "stale_core_detection",
+            "suite_id": "core_archival_memory",
+            "status": "blocked",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "ELF fixture core-archival-stale-core-detection-001 scores archival evidence superseding a stale core block. Letta stale-core comparison is blocked until core export and archival readback can be joined by source ids.",
+            "command": "cargo make smoke-letta-core-archive-export-readback",
+            "artifact": "tmp/real-world-memory/letta-core-archive/summary.json"
+          },
+          {
+            "scenario_id": "archival_fallback_readback",
+            "suite_id": "core_archival_memory",
+            "status": "blocked",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "ELF fixture core-archival-archival-fallback-001 scores fallback from insufficient core memory to archival note search. Letta fallback comparison is blocked until archival search output can be exported with source ids.",
+            "command": "cargo make smoke-letta-core-archive-export-readback",
+            "artifact": "tmp/real-world-memory/letta-core-archive/summary.json"
+          },
+          {
+            "scenario_id": "core_archival_project_decision_recovery",
+            "suite_id": "core_archival_memory",
+            "status": "blocked",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "ELF fixture core-archival-project-decision-recovery-001 scores core routing plus archival decision rationale. Letta project-decision recovery remains blocked until the generated export/readback artifact maps core routing plus archival rationale source ids.",
+            "command": "cargo make smoke-letta-core-archive-export-readback",
+            "artifact": "tmp/real-world-memory/letta-core-archive/summary.json"
+          }
+        ],
+        "evidence": [
+          {
+            "kind": "artifact",
+            "ref": "tmp/real-world-memory/letta-core-archive/letta-core-archive-export.json",
+            "status": "blocked"
+          },
+          {
+            "kind": "artifact",
+            "ref": "tmp/real-world-memory/letta-core-archive/summary.json",
+            "status": "blocked"
+          },
+          {
+            "kind": "source",
+            "ref": "https://docs.letta.com/guides/docker",
+            "status": "real"
+          },
+          {
+            "kind": "source",
+            "ref": "https://docs.letta.com/api/python",
+            "status": "real"
+          },
+          {
+            "kind": "source",
+            "ref": "https://docs.letta.com/api/resources/agents/subresources/passages/methods/search",
+            "status": "real"
+          }
+        ],
+        "execution_metadata": {
+          "sources": [
+            {
+              "label": "Letta Docker docs",
+              "url": "https://docs.letta.com/guides/docker",
+              "evidence": "Official Docker setup and explicit embedding configuration boundary."
+            },
+            {
+              "label": "Letta Python API",
+              "url": "https://docs.letta.com/api/python",
+              "evidence": "Official Python SDK memory block creation and retrieval examples."
+            },
+            {
+              "label": "Letta archival search API",
+              "url": "https://docs.letta.com/api/resources/agents/subresources/passages/methods/search",
+              "evidence": "Official archival-memory search endpoint contract."
+            }
+          ],
+          "setup_path": "Run cargo make smoke-letta-core-archive-export-readback for a typed artifact; set ELF_LETTA_SMOKE_START=1 ELF_LETTA_SMOKE_RUN=1 with explicit model/provider configuration for a live export attempt. The smoke exports core block JSON plus archival search/readback JSON when Letta setup succeeds.",
+          "runtime_boundary": "docker-compose.baseline.yml baseline-runner plus optional Letta server profile, benchmark-created agent, benchmark-owned fixture corpus, no hosted/private state, and artifacts under tmp/real-world-memory/letta-core-archive.",
+          "resource_expectation": "Letta Docker server, Python SDK client, explicit model and embedding configuration, exported core memory, archival search output, and provider boundaries must be explicit in the artifact.",
+          "retry_guidance": [
+            "Default command records a typed blocked artifact without model calls.",
+            "Enable the live path only with Docker-local Letta and explicit provider or local model configuration.",
+            "Score core-versus-archival scenarios only after core block export and archival list/search output map to fixture evidence ids."
+          ],
+          "research_depth": "D1 feasibility verdict: research_only (XY-882); XY-927 selected the contained export/readback contract; XY-984 adds the Docker-contained materializer and keeps the comparison blocked until live export evidence maps source ids."
+        },
+        "notes": []
+      },
+      {
+        "adapter_id": "langgraph_research_gate",
+        "project": "LangGraph",
+        "adapter_kind": "research_gate",
+        "evidence_class": "research_gate",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "not_encoded",
+        "setup": {
+          "status": "not_encoded",
+          "evidence": "LangGraph is D1 reviewed as a replay/checkpoint reference, not a direct memory backend adapter."
+        },
+        "run": {
+          "status": "not_encoded",
+          "evidence": "No checkpoint replay real_world_job harness is encoded."
+        },
+        "result": {
+          "status": "not_encoded",
+          "evidence": "No production-ops or resume suite result is claimed."
+        },
+        "capabilities": [
+          {
+            "capability": "checkpoint_replay_regression",
+            "status": "not_encoded",
+            "evidence": "Replay/fork behavior needs an agent graph harness before scoring."
+          },
+          {
+            "capability": "standalone_memory_backend",
+            "status": "unsupported",
+            "evidence": "LangGraph persistence is an agent-state/checkpoint layer, not a drop-in memory retrieval backend."
+          },
+          {
+            "capability": "real_world_job_adapter",
+            "status": "not_encoded",
+            "evidence": "No LangGraph benchmark materializer exists."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "production_ops",
+            "status": "not_encoded",
+            "evidence": "Checkpoint recovery and replay regression are not encoded."
+          },
+          {
+            "suite_id": "work_resume",
+            "status": "not_encoded",
+            "evidence": "Resume from checkpoint with memory reads is not encoded."
+          }
+        ],
+        "scenarios": [],
+        "evidence": [
+          {
+            "kind": "source",
+            "ref": "https://docs.langchain.com/oss/python/langgraph/persistence",
+            "status": "real"
+          }
+        ],
+        "execution_metadata": {
+          "sources": [
+            {
+              "label": "LangGraph persistence docs",
+              "url": "https://docs.langchain.com/oss/python/langgraph/persistence",
+              "evidence": "Official documentation for checkpoints, replay, fork, and persistence behavior."
+            }
+          ],
+          "setup_path": "Build a tiny LangGraph agent with a checkpointer and explicit memory read/write steps before scoring.",
+          "runtime_boundary": "Docker-only Python harness with checkpoint store under the artifact directory.",
+          "resource_expectation": "Small runtime expected, but LLM calls and side effects must be stubbed or deterministic before replay claims.",
+          "retry_guidance": [
+            "Encode one replay/fork failure recovery job.",
+            "Keep LangGraph classified as replay reference unless memory retrieval is actually exercised."
+          ],
+          "research_depth": "D1 feasibility verdict: research_only (XY-882); replay/checkpoint reference, adapter not encoded"
+        },
+        "notes": []
+      },
+      {
+        "adapter_id": "nanograph_research_gate",
+        "project": "nanograph",
+        "adapter_kind": "research_gate",
+        "evidence_class": "research_gate",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "not_encoded",
+        "setup": {
+          "status": "not_encoded",
+          "evidence": "nanograph is D1 reviewed as typed graph DX, but no Docker adapter is implemented."
+        },
+        "run": {
+          "status": "not_encoded",
+          "evidence": "No typed graph schema/query real_world_job run is encoded."
+        },
+        "result": {
+          "status": "not_encoded",
+          "evidence": "No graph temporal or retrieval-debug result is claimed."
+        },
+        "capabilities": [
+          {
+            "capability": "typed_graph_schema",
+            "status": "not_encoded",
+            "evidence": "Schema-as-code and typed query ergonomics need a benchmark harness."
+          },
+          {
+            "capability": "memory_backend_comparison",
+            "status": "unsupported",
+            "evidence": "nanograph is a graph database reference, not a complete agent memory service."
+          },
+          {
+            "capability": "real_world_job_adapter",
+            "status": "not_encoded",
+            "evidence": "No nanograph materializer exists."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "memory_evolution",
+            "status": "not_encoded",
+            "evidence": "Typed current/historical fact jobs are not encoded."
+          },
+          {
+            "suite_id": "retrieval",
+            "status": "not_encoded",
+            "evidence": "Typed query explainability is not scored."
+          }
+        ],
+        "scenarios": [],
+        "evidence": [
+          {
+            "kind": "source",
+            "ref": "https://github.com/nanograph/nanograph",
+            "status": "real"
+          }
+        ],
+        "execution_metadata": {
+          "sources": [
+            {
+              "label": "nanograph repository",
+              "url": "https://github.com/nanograph/nanograph",
+              "evidence": "Official source for on-device typed property graph behavior."
+            }
+          ],
+          "setup_path": "Build or install nanograph inside Docker and load a typed graph fixture from generated corpus facts.",
+          "runtime_boundary": "Docker-only CLI run with graph folder under benchmark artifacts.",
+          "resource_expectation": "Light local graph runtime expected; record binary build/install time and graph artifact size.",
+          "retry_guidance": [
+            "Define a minimal schema for memory_evolution facts.",
+            "Score typed query output only if it cites fixture evidence IDs."
+          ],
+          "research_depth": "D1 feasibility verdict: research_only (XY-882); typed graph DX reference, adapter not encoded"
+        },
+        "notes": []
+      },
+      {
+        "adapter_id": "llm_wiki_research_gate",
+        "project": "llm-wiki",
+        "adapter_kind": "research_gate",
+        "evidence_class": "research_gate",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "not_encoded",
+        "setup": {
+          "status": "not_encoded",
+          "evidence": "llm-wiki is D1 reviewed as a knowledge-compilation reference, but no plugin or generated-page adapter is implemented."
+        },
+        "run": {
+          "status": "not_encoded",
+          "evidence": "No llm-wiki corpus-to-page run is encoded."
+        },
+        "result": {
+          "status": "not_encoded",
+          "evidence": "No knowledge page citation or lint result is claimed."
+        },
+        "capabilities": [
+          {
+            "capability": "knowledge_page_compilation",
+            "status": "not_encoded",
+            "evidence": "Wiki generation and citation lint are not executed by the runner."
+          },
+          {
+            "capability": "live_service_runtime",
+            "status": "unsupported",
+            "evidence": "llm-wiki is a plugin/workflow reference rather than a service adapter."
+          },
+          {
+            "capability": "real_world_job_adapter",
+            "status": "not_encoded",
+            "evidence": "No page materializer or scorer mapping exists."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "knowledge_compilation",
+            "status": "not_encoded",
+            "evidence": "Corpus-to-wiki output is not encoded."
+          },
+          {
+            "suite_id": "work_resume",
+            "status": "not_encoded",
+            "evidence": "Resume answers from wiki pages are not encoded."
+          }
+        ],
+        "scenarios": [
+          {
+            "scenario_id": "wiki_page_citation_lint",
+            "suite_id": "knowledge_compilation",
+            "status": "not_encoded",
+            "elf_position": "untested",
+            "comparison_outcome": "not_tested",
+            "evidence": "llm-wiki remains a knowledge-workflow reference. No Docker-contained plugin or file-based page materializer emits cited wiki sections for scoring.",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
+          }
+        ],
+        "evidence": [
+          {
+            "kind": "source",
+            "ref": "https://github.com/nvk/llm-wiki",
+            "status": "real"
+          }
+        ],
+        "execution_metadata": {
+          "sources": [
+            {
+              "label": "llm-wiki repository",
+              "url": "https://github.com/nvk/llm-wiki",
+              "evidence": "Official source for the LLM Wiki plugin and knowledge-base workflow."
+            }
+          ],
+          "setup_path": "Research plugin bootstrap inside a Docker-contained Codex or file-based harness, then materialize page artifacts.",
+          "runtime_boundary": "Docker-only plugin or fixture materializer; no user-global Codex plugin install.",
+          "resource_expectation": "LLM generation cost depends on page build; record provider boundary and generated artifact size.",
+          "retry_guidance": [
+            "Prototype a fixture-only page build with explicit citations.",
+            "Do not score until generated sections can be mapped to evidence IDs."
+          ],
+          "research_depth": "D1 feasibility verdict: research_only (XY-882); derived wiki workflow reference, adapter not encoded"
+        },
+        "notes": []
+      },
+      {
+        "adapter_id": "gbrain_research_gate",
+        "project": "gbrain",
+        "adapter_kind": "research_gate",
+        "evidence_class": "research_gate",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "not_encoded",
+        "setup": {
+          "status": "not_encoded",
+          "evidence": "gbrain is D1 reviewed as a compiled-truth and timeline reference, but no Docker adapter is implemented."
+        },
+        "run": {
+          "status": "not_encoded",
+          "evidence": "No gbrain brain-repo import or compiled-truth run is encoded."
+        },
+        "result": {
+          "status": "not_encoded",
+          "evidence": "No knowledge-synthesis or operator-continuity result is claimed."
+        },
+        "capabilities": [
+          {
+            "capability": "compiled_truth_timeline",
+            "status": "not_encoded",
+            "evidence": "Compiled truth plus timeline output is a reference pattern but not scored."
+          },
+          {
+            "capability": "postgres_backed_brain_repo",
+            "status": "blocked",
+            "evidence": "A Docker-local brain repo and Postgres setup path must be proven before execution."
+          },
+          {
+            "capability": "real_world_job_adapter",
+            "status": "not_encoded",
+            "evidence": "No gbrain materializer exists."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "knowledge_compilation",
+            "status": "not_encoded",
+            "evidence": "Compiled truth and timeline pages are not scored."
+          },
+          {
+            "suite_id": "operator_debugging_ux",
+            "status": "not_encoded",
+            "evidence": "Operator continuity through brain pages is not encoded."
+          }
+        ],
+        "scenarios": [
+          {
+            "scenario_id": "compiled_truth_timeline_export",
+            "suite_id": "knowledge_compilation",
+            "status": "blocked",
+            "elf_position": "untested",
+            "comparison_outcome": "blocked",
+            "evidence": "gbrain compiled-truth and timeline scoring remains blocked until a Docker-local brain repository and database setup emits current-truth pages with source timeline evidence.",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
+          }
+        ],
+        "evidence": [
+          {
+            "kind": "source",
+            "ref": "https://github.com/garrytan/gbrain",
+            "status": "real"
+          },
+          {
+            "kind": "source",
+            "ref": "https://github.com/garrytan/gbrain/blob/master/docs/guides/compiled-truth.md",
+            "status": "real"
+          }
+        ],
+        "execution_metadata": {
+          "sources": [
+            {
+              "label": "gbrain repository",
+              "url": "https://github.com/garrytan/gbrain",
+              "evidence": "Official source for brain repo and retrieval workflow."
+            },
+            {
+              "label": "compiled truth guide",
+              "url": "https://github.com/garrytan/gbrain/blob/master/docs/guides/compiled-truth.md",
+              "evidence": "Official guide for compiled truth plus timeline behavior."
+            }
+          ],
+          "setup_path": "Create a Docker-local brain repo fixture, run import/sync, and export compiled truth plus timeline evidence.",
+          "runtime_boundary": "Docker-only repository and database state with no operator-owned brain repo.",
+          "resource_expectation": "Postgres-backed sync and embedding choices must be explicit; record DB size and import time.",
+          "retry_guidance": [
+            "Prototype a tiny brain repo with one current-truth page and timeline.",
+            "Score only if compiled truth cites the source timeline evidence."
+          ],
+          "research_depth": "D1 feasibility verdict: blocked (XY-882); Docker-local brain repo and database path not proven"
+        },
+        "notes": []
+      },
+      {
+        "adapter_id": "graphify_docker_smoke",
+        "project": "graphify",
+        "adapter_kind": "docker_cli_real_world_job",
+        "evidence_class": "live_real_world",
+        "docker_default": true,
+        "host_global_installs_required": false,
+        "overall_status": "wrong_result",
+        "setup": {
+          "status": "pass",
+          "evidence": "XY-900 validation reached the Docker-only graph/report smoke setup inside the baseline runner without host-global assistant hooks.",
+          "command": "cargo make smoke-graphify-docker-graph-report",
+          "artifact": "tmp/real-world-memory/graphify-smoke/graphify-smoke.json"
+        },
+        "run": {
+          "status": "pass",
+          "evidence": "The smoke installed graphify in a container-local venv, ran over a generated public corpus, and produced graph/report/query output for scoring.",
+          "command": "cargo make smoke-graphify-docker-graph-report",
+          "artifact": "tmp/real-world-memory/graphify-smoke/summary.json"
+        },
+        "result": {
+          "status": "wrong_result",
+          "evidence": "The smoke emits graphify-report.json and graphify-report.md from one generated knowledge_compilation job. The current scored report maps evidence ids but remains wrong_result because the scoring rubric still records a wrong-result signal.",
+          "artifact": "tmp/real-world-memory/graphify-smoke/graphify-report.json"
+        },
+        "capabilities": [
+          {
+            "capability": "docker_cli_boundary",
+            "status": "pass",
+            "evidence": "The smoke uses docker-compose.baseline.yml baseline-runner, a container-local Python venv, and isolated assistant config paths; it does not install host-global assistant hooks."
+          },
+          {
+            "capability": "graph_report_generation",
+            "status": "pass",
+            "evidence": "The smoke captures graphify-out/graph.json, GRAPH_REPORT.md, cache metadata, command logs, build time, graph size, and report size."
+          },
+          {
+            "capability": "real_world_job_adapter",
+            "status": "wrong_result",
+            "evidence": "The smoke writes a generated real_world_job fixture and scored report; current knowledge_compilation scoring is wrong_result, not pass."
+          },
+          {
+            "capability": "multimodal_code_graph",
+            "status": "not_encoded",
+            "evidence": "Multimodal extraction for videos, images, PDFs, or broad codebase understanding is a reference capability but not scored by this smoke."
+          },
+          {
+            "capability": "quality_or_scale_claim",
+            "status": "not_encoded",
+            "evidence": "The smoke does not claim broad graph quality, private corpus behavior, scale, or authoritative memory-store behavior."
+          }
+        ],
+        "suites": [
+          {
+            "suite_id": "knowledge_compilation",
+            "status": "wrong_result",
+            "evidence": "The generated smoke exercised graph/report evidence mapping for one generated knowledge-compilation fixture and scored wrong_result with mean_score 0.75."
+          },
+          {
+            "suite_id": "retrieval",
+            "status": "blocked",
+            "evidence": "Graph-guided query output is present only as support for the generated knowledge_compilation smoke; broad retrieval quality scoring remains unclaimed."
+          },
+          {
+            "suite_id": "work_resume",
+            "status": "not_encoded",
+            "evidence": "Resume answers from graph context are not encoded."
+          }
+        ],
+        "scenarios": [
+          {
+            "scenario_id": "graph_report_navigation_lint",
+            "suite_id": "knowledge_compilation",
+            "status": "wrong_result",
+            "elf_position": "untested",
+            "comparison_outcome": "not_tested",
+            "evidence": "XY-929 adds a representative graphify fixture that scores graph report navigation, source-location citations, stale-source lint, and unsupported-summary handling as wrong_result because stale-source lint is still missing. This remains graphify non-pass evidence, not an ELF victory claim.",
+            "command": "cargo make real-world-memory-graph-rag",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/graph_rag/graphify_graph_report_wrong_result.json"
+          },
+          {
+            "scenario_id": "broad_graph_navigation_quality",
+            "suite_id": "retrieval",
+            "status": "not_encoded",
+            "elf_position": "untested",
+            "comparison_outcome": "not_tested",
+            "evidence": "Broad graph-navigation, codebase, multimodal, and private-corpus quality remain not_tested; the graphify evidence is bounded to generated graph/report artifacts.",
+            "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/memory_projects_manifest.json"
+          }
+        ],
+        "evidence": [
+          {
+            "kind": "source",
+            "ref": "https://github.com/safishamsi/graphify",
+            "status": "real"
+          },
+          {
+            "kind": "command",
+            "ref": "cargo make smoke-graphify-docker-graph-report",
+            "status": "wrong_result"
+          },
+          {
+            "kind": "artifact",
+            "ref": "tmp/real-world-memory/graphify-smoke/graphify-smoke.json",
+            "status": "pass"
+          },
+          {
+            "kind": "artifact",
+            "ref": "tmp/real-world-memory/graphify-smoke/graphify-report.md",
+            "status": "wrong_result"
+          }
+        ],
+        "execution_metadata": {
+          "sources": [
+            {
+              "label": "graphify repository",
+              "url": "https://github.com/safishamsi/graphify",
+              "evidence": "Official source for graphify graph extraction and query workflow."
+            },
+            {
+              "label": "graphify README",
+              "url": "https://github.com/safishamsi/graphify/blob/v3/README.md",
+              "evidence": "Official CLI, output artifact, query, and source-location contract."
+            }
+          ],
+          "setup_path": "Run cargo make smoke-graphify-docker-graph-report to install graphify in Docker, build graph/report artifacts from a generated public corpus, and export query evidence without installing host-global assistant hooks.",
+          "runtime_boundary": "docker-compose.baseline.yml baseline-runner, container-local Python venv, isolated HOME/config paths, generated public corpus, and artifacts under tmp/real-world-memory/graphify-smoke.",
+          "resource_expectation": "Graph build cost scales with corpus and model choices; generated artifacts record package reference, provider/model boundary, build time, graph size, report size, cache size, timeout, and retry behavior.",
+          "retry_guidance": [
+            "Run cargo make smoke-graphify-docker-graph-report first; setup/runtime failures must remain typed artifacts, not pass claims.",
+            "Do not use graphify host assistant hook installs or operator-owned assistant configuration as proof.",
+            "Score graph-guided answers only when graph.json, GRAPH_REPORT.md, and graphify query output map to generated evidence ids."
+          ],
+          "research_depth": "D1 feasibility verdict plus XY-889 Docker graph/report smoke implementation and XY-900 scored smoke promotion; current Docker validation reaches graphify output and scores the tiny knowledge_compilation job as wrong_result"
+        },
+        "notes": [
+          "Status class: live Docker scored smoke with a current wrong_result outcome.",
+          "Do not interpret graphify-report.json as broad graph-navigation or knowledge-compilation quality evidence; the tiny smoke is scored and currently non-pass."
+        ],
+        "follow_up": {
+          "title": "[ELF benchmark adapter] Implement graphify Docker graph-report adapter",
+          "reason": "Created as XY-889. XY-882 found a Docker-only CLI/materializer path and source-file/source-location output contract."
+        }
+      }
+    ]
+  },
+  "capture_integration": {
+    "real": [],
+    "fixture_backed": [],
+    "mocked": [],
+    "blocked": [],
+    "not_encoded": [
+      "No capture/integration behavior was declared by encoded fixtures."
+    ],
+    "notes": []
+  },
+  "summary": {
+    "job_count": 11,
+    "encoded_suite_count": 3,
+    "pass": 9,
+    "wrong_result": 0,
+    "lifecycle_fail": 0,
+    "incomplete": 0,
+    "blocked": 2,
+    "not_encoded": 0,
+    "unsupported_claim": 0,
+    "unsupported_claim_count": 0,
+    "wrong_result_count": 0,
+    "stale_answer_count": 0,
+    "conflict_detection_count": 0,
+    "update_rationale_available_count": 0,
+    "temporal_validity_not_encoded_count": 0,
+    "history_readback_encoded_count": 0,
+    "expected_evidence_total": 22,
+    "expected_evidence_matched": 22,
+    "expected_evidence_recall": 1.0,
+    "irrelevant_context_count": 0,
+    "irrelevant_context_ratio": 0.0,
+    "trace_explainability_count": 11,
+    "wrong_result_stage_attribution_count": 0,
+    "mean_score": 0.818,
+    "mean_latency_ms": 9.836,
+    "total_cost": {
+      "currency": "USD",
+      "amount": 0.0,
+      "input_tokens": 0,
+      "output_tokens": 0
+    },
+    "evidence_required_count": 22,
+    "evidence_covered_count": 22,
+    "evidence_coverage": 1.0,
+    "source_ref_required_count": 22,
+    "source_ref_covered_count": 22,
+    "source_ref_coverage": 1.0,
+    "quote_required_count": 22,
+    "quote_covered_count": 22,
+    "quote_coverage": 1.0,
+    "stale_retrieval_count": 0,
+    "scope_check_count": 0,
+    "scope_correct_count": 0,
+    "scope_correctness": 0.0,
+    "scope_violation_count": 0,
+    "redaction_leak_count": 0,
+    "qdrant_rebuild_case_count": 0,
+    "qdrant_rebuild_pass_count": 0,
+    "operator_debug_job_count": 0,
+    "raw_sql_needed_count": 0,
+    "trace_incomplete_count": 0,
+    "operator_ux_gap_count": 0,
+    "consolidation": {
+      "proposal_count": 0,
+      "proposal_usefulness": null,
+      "lineage_completeness": null,
+      "review_action_correctness": null,
+      "source_mutation_count": 0,
+      "proposal_unsupported_claim_count": 0,
+      "executable_gap_count": 0
+    },
+    "memory_summary": {
+      "job_count": 1,
+      "summary_count": 1,
+      "entry_count": 7,
+      "required_category_count": 6,
+      "covered_required_category_count": 6,
+      "missing_required_category_count": 0,
+      "top_of_mind_count": 1,
+      "background_count": 1,
+      "stale_count": 1,
+      "superseded_count": 1,
+      "tombstone_count": 1,
+      "derived_project_profile_count": 2,
+      "source_ref_required_count": 6,
+      "source_ref_entry_count": 6,
+      "source_ref_coverage": 1.0,
+      "freshness_marker_count": 7,
+      "freshness_coverage": 1.0,
+      "rationale_count": 7,
+      "rationale_coverage": 1.0,
+      "invalid_top_of_mind_count": 0,
+      "untraced_entry_count": 0,
+      "derived_with_source_or_unsupported_count": 2,
+      "derived_missing_source_or_unsupported_count": 0,
+      "unsupported_derived_entry_count": 1,
+      "unsupported_current_entry_count": 0,
+      "tombstone_ref_count": 1,
+      "source_trace_selected_count": 2,
+      "source_trace_dropped_count": 1,
+      "source_trace_stale_count": 1,
+      "source_trace_superseded_count": 1,
+      "source_trace_tombstone_count": 1
+    },
+    "proactive_brief": {
+      "job_count": 4,
+      "brief_count": 4,
+      "suggestion_count": 5,
+      "required_suggestion_kind_count": 4,
+      "covered_required_suggestion_kind_count": 4,
+      "missing_required_suggestion_kind_count": 0,
+      "evidence_ref_required_count": 5,
+      "evidence_ref_suggestion_count": 5,
+      "evidence_ref_coverage": 1.0,
+      "freshness_marker_count": 5,
+      "freshness_coverage": 1.0,
+      "action_rationale_count": 5,
+      "action_rationale_coverage": 1.0,
+      "recommended_count": 2,
+      "deferred_count": 2,
+      "rejected_count": 1,
+      "current_suggestion_count": 2,
+      "non_current_suggestion_count": 3,
+      "stale_warning_count": 3,
+      "invalid_current_suggestion_count": 0,
+      "untraced_suggestion_count": 0,
+      "unsupported_current_suggestion_count": 0,
+      "tombstone_violation_count": 0,
+      "source_trace_selected_count": 7,
+      "source_trace_dropped_count": 0,
+      "source_trace_stale_count": 2,
+      "source_trace_superseded_count": 2,
+      "source_trace_tombstone_count": 1
+    },
+    "scheduled_memory": {
+      "job_count": 4,
+      "task_run_count": 4,
+      "output_count": 5,
+      "required_task_kind_count": 4,
+      "covered_required_task_kind_count": 4,
+      "missing_required_task_kind_count": 0,
+      "evidence_ref_required_count": 5,
+      "evidence_ref_output_count": 5,
+      "evidence_ref_coverage": 1.0,
+      "freshness_marker_count": 5,
+      "freshness_coverage": 1.0,
+      "action_rationale_count": 5,
+      "action_rationale_coverage": 1.0,
+      "trace_required_count": 4,
+      "trace_complete_count": 4,
+      "trace_coverage": 1.0,
+      "source_mutation_count": 0,
+      "current_output_count": 2,
+      "non_current_output_count": 3,
+      "invalid_current_output_count": 0,
+      "untraced_output_count": 0,
+      "unsupported_current_output_count": 0,
+      "tombstone_violation_count": 0,
+      "source_trace_selected_count": 7,
+      "source_trace_dropped_count": 0,
+      "source_trace_stale_count": 2,
+      "source_trace_superseded_count": 3,
+      "source_trace_tombstone_count": 1
+    }
+  },
+  "suites": [
+    {
+      "suite_id": "trust_source_of_truth",
+      "status": "not_encoded",
+      "encoded_job_count": 0,
+      "score_mean": null,
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available_count": 0,
+      "temporal_validity_not_encoded_count": 0,
+      "history_readback_encoded_count": 0,
+      "expected_evidence_recall": null,
+      "irrelevant_context_ratio": null,
+      "trace_explainability_count": 0,
+      "reason": "No checked-in real_world_job fixture is encoded for this suite."
+    },
+    {
+      "suite_id": "work_resume",
+      "status": "not_encoded",
+      "encoded_job_count": 0,
+      "score_mean": null,
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available_count": 0,
+      "temporal_validity_not_encoded_count": 0,
+      "history_readback_encoded_count": 0,
+      "expected_evidence_recall": null,
+      "irrelevant_context_ratio": null,
+      "trace_explainability_count": 0,
+      "reason": "No checked-in real_world_job fixture is encoded for this suite."
+    },
+    {
+      "suite_id": "project_decisions",
+      "status": "not_encoded",
+      "encoded_job_count": 0,
+      "score_mean": null,
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available_count": 0,
+      "temporal_validity_not_encoded_count": 0,
+      "history_readback_encoded_count": 0,
+      "expected_evidence_recall": null,
+      "irrelevant_context_ratio": null,
+      "trace_explainability_count": 0,
+      "reason": "No checked-in real_world_job fixture is encoded for this suite."
+    },
+    {
+      "suite_id": "retrieval",
+      "status": "not_encoded",
+      "encoded_job_count": 0,
+      "score_mean": null,
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available_count": 0,
+      "temporal_validity_not_encoded_count": 0,
+      "history_readback_encoded_count": 0,
+      "expected_evidence_recall": null,
+      "irrelevant_context_ratio": null,
+      "trace_explainability_count": 0,
+      "reason": "No checked-in real_world_job fixture is encoded for this suite."
+    },
+    {
+      "suite_id": "memory_evolution",
+      "status": "not_encoded",
+      "encoded_job_count": 0,
+      "score_mean": null,
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available_count": 0,
+      "temporal_validity_not_encoded_count": 0,
+      "history_readback_encoded_count": 0,
+      "expected_evidence_recall": null,
+      "irrelevant_context_ratio": null,
+      "trace_explainability_count": 0,
+      "reason": "No checked-in real_world_job fixture is encoded for this suite."
+    },
+    {
+      "suite_id": "consolidation",
+      "status": "not_encoded",
+      "encoded_job_count": 0,
+      "score_mean": null,
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available_count": 0,
+      "temporal_validity_not_encoded_count": 0,
+      "history_readback_encoded_count": 0,
+      "expected_evidence_recall": null,
+      "irrelevant_context_ratio": null,
+      "trace_explainability_count": 0,
+      "reason": "No checked-in real_world_job fixture is encoded for this suite."
+    },
+    {
+      "suite_id": "memory_summary",
+      "status": "pass",
+      "encoded_job_count": 1,
+      "score_mean": 1.0,
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available_count": 0,
+      "temporal_validity_not_encoded_count": 0,
+      "history_readback_encoded_count": 0,
+      "expected_evidence_recall": 1.0,
+      "irrelevant_context_ratio": 0.0,
+      "trace_explainability_count": 1,
+      "reason": "All 1 encoded job(s) passed."
+    },
+    {
+      "suite_id": "proactive_brief",
+      "status": "blocked",
+      "encoded_job_count": 5,
+      "score_mean": 0.8,
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available_count": 0,
+      "temporal_validity_not_encoded_count": 0,
+      "history_readback_encoded_count": 0,
+      "expected_evidence_recall": 1.0,
+      "irrelevant_context_ratio": 0.0,
+      "trace_explainability_count": 5,
+      "reason": "At least one encoded job is blocked."
+    },
+    {
+      "suite_id": "scheduled_memory",
+      "status": "blocked",
+      "encoded_job_count": 5,
+      "score_mean": 0.8,
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available_count": 0,
+      "temporal_validity_not_encoded_count": 0,
+      "history_readback_encoded_count": 0,
+      "expected_evidence_recall": 1.0,
+      "irrelevant_context_ratio": 0.0,
+      "trace_explainability_count": 5,
+      "reason": "At least one encoded job is blocked."
+    },
+    {
+      "suite_id": "knowledge_compilation",
+      "status": "not_encoded",
+      "encoded_job_count": 0,
+      "score_mean": null,
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available_count": 0,
+      "temporal_validity_not_encoded_count": 0,
+      "history_readback_encoded_count": 0,
+      "expected_evidence_recall": null,
+      "irrelevant_context_ratio": null,
+      "trace_explainability_count": 0,
+      "reason": "No checked-in real_world_job fixture is encoded for this suite."
+    },
+    {
+      "suite_id": "operator_debugging_ux",
+      "status": "not_encoded",
+      "encoded_job_count": 0,
+      "score_mean": null,
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available_count": 0,
+      "temporal_validity_not_encoded_count": 0,
+      "history_readback_encoded_count": 0,
+      "expected_evidence_recall": null,
+      "irrelevant_context_ratio": null,
+      "trace_explainability_count": 0,
+      "reason": "No checked-in real_world_job fixture is encoded for this suite."
+    },
+    {
+      "suite_id": "capture_integration",
+      "status": "not_encoded",
+      "encoded_job_count": 0,
+      "score_mean": null,
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available_count": 0,
+      "temporal_validity_not_encoded_count": 0,
+      "history_readback_encoded_count": 0,
+      "expected_evidence_recall": null,
+      "irrelevant_context_ratio": null,
+      "trace_explainability_count": 0,
+      "reason": "No checked-in real_world_job fixture is encoded for this suite."
+    },
+    {
+      "suite_id": "production_ops",
+      "status": "not_encoded",
+      "encoded_job_count": 0,
+      "score_mean": null,
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available_count": 0,
+      "temporal_validity_not_encoded_count": 0,
+      "history_readback_encoded_count": 0,
+      "expected_evidence_recall": null,
+      "irrelevant_context_ratio": null,
+      "trace_explainability_count": 0,
+      "reason": "No checked-in real_world_job fixture is encoded for this suite."
+    },
+    {
+      "suite_id": "personalization",
+      "status": "not_encoded",
+      "encoded_job_count": 0,
+      "score_mean": null,
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available_count": 0,
+      "temporal_validity_not_encoded_count": 0,
+      "history_readback_encoded_count": 0,
+      "expected_evidence_recall": null,
+      "irrelevant_context_ratio": null,
+      "trace_explainability_count": 0,
+      "reason": "No checked-in real_world_job fixture is encoded for this suite."
+    },
+    {
+      "suite_id": "core_archival_memory",
+      "status": "not_encoded",
+      "encoded_job_count": 0,
+      "score_mean": null,
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available_count": 0,
+      "temporal_validity_not_encoded_count": 0,
+      "history_readback_encoded_count": 0,
+      "expected_evidence_recall": null,
+      "irrelevant_context_ratio": null,
+      "trace_explainability_count": 0,
+      "reason": "No checked-in real_world_job fixture is encoded for this suite."
+    },
+    {
+      "suite_id": "context_trajectory",
+      "status": "not_encoded",
+      "encoded_job_count": 0,
+      "score_mean": null,
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available_count": 0,
+      "temporal_validity_not_encoded_count": 0,
+      "history_readback_encoded_count": 0,
+      "expected_evidence_recall": null,
+      "irrelevant_context_ratio": null,
+      "trace_explainability_count": 0,
+      "reason": "No checked-in real_world_job fixture is encoded for this suite."
+    }
+  ],
+  "jobs": [
+    {
+      "suite_id": "memory_summary",
+      "job_id": "memory-summary-source-trace-001",
+      "title": "Read back a reviewable current memory summary with source trace",
+      "status": "pass",
+      "answer_type": "reviewable_memory_summary",
+      "requires_caveat": false,
+      "requires_refusal": false,
+      "can_answer_unknown": false,
+      "normalized_score": 1.0,
+      "hard_fail_hits": [],
+      "expected_evidence": [
+        {
+          "evidence_id": "summary-contract-current",
+          "claim_id": "summary_contract_reviewable",
+          "requirement": "cite"
+        },
+        {
+          "evidence_id": "xy952-summary-contract",
+          "claim_id": "summary_stage_now_fixture_backed",
+          "requirement": "cite"
+        },
+        {
+          "evidence_id": "summary-ttl-tombstone",
+          "claim_id": "summary_preserves_tombstone",
+          "requirement": "cite"
+        },
+        {
+          "evidence_id": "summary-contract-non-parity-boundary",
+          "claim_id": "summary_excludes_unsupported_parity",
+          "requirement": "cite"
+        }
+      ],
+      "produced_answer": "Memory summaries now use a reviewable source-trace contract. Postgres remains authoritative while Qdrant remains a rebuildable derived index. The old memory-summary stage state was not_tested before XY-952. The pre-XY-905 live memory_evolution loss is historical. The fixture-only managed-memory parity claim is tombstoned and excluded. Project profile: ELF summaries are reviewable derived readback, not authoritative notes. Excluded candidate: the local summary contract proves parity with managed memory products.",
+      "produced_evidence": [
+        "summary-contract-current",
+        "summary-contract-non-parity-boundary",
+        "summary-ttl-tombstone",
+        "xy952-summary-contract"
+      ],
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available": false,
+      "temporal_validity_not_encoded": false,
+      "history_readback_encoded": false,
+      "retrieval_quality": {
+        "expected_evidence_total": 4,
+        "expected_evidence_matched": 4,
+        "expected_evidence_recall": 1.0,
+        "produced_evidence_total": 4,
+        "irrelevant_context_count": 0,
+        "irrelevant_context_ratio": 0.0,
+        "trap_context_count": 0
+      },
+      "latency_ms": 51.676775,
+      "cost": {
+        "currency": "USD",
+        "amount": 0.0,
+        "input_tokens": 0,
+        "output_tokens": 0
+      },
+      "trace_explainability": {
+        "trace_id": "2e80669d-2bcf-4238-b780-9b42aa72d2a2",
+        "stages": [
+          {
+            "stage_name": "dreaming_readback.service_list",
+            "kept_evidence": [
+              "stale-summary-gap",
+              "summary-background-sot",
+              "summary-contract-current",
+              "summary-contract-non-parity-boundary",
+              "summary-temporary-claim",
+              "summary-ttl-tombstone",
+              "superseded-live-evolution-loss",
+              "xy952-summary-contract"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Read 8 source refs from ElfService::list for memory_summary."
+          },
+          {
+            "stage_name": "dreaming_readback.source_mutation_guard",
+            "kept_evidence": [
+              "stale-summary-gap",
+              "summary-background-sot",
+              "summary-contract-current",
+              "summary-contract-non-parity-boundary",
+              "summary-temporary-claim",
+              "summary-ttl-tombstone",
+              "superseded-live-evolution-loss",
+              "xy952-summary-contract"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Generated readback artifacts without mutating source notes."
+          }
+        ]
+      },
+      "memory_summary": {
+        "summary_count": 1,
+        "entry_count": 7,
+        "required_category_count": 6,
+        "covered_required_category_count": 6,
+        "missing_required_category_count": 0,
+        "top_of_mind_count": 1,
+        "background_count": 1,
+        "stale_count": 1,
+        "superseded_count": 1,
+        "tombstone_count": 1,
+        "derived_project_profile_count": 2,
+        "source_ref_required_count": 6,
+        "source_ref_entry_count": 6,
+        "source_ref_coverage": 1.0,
+        "freshness_marker_count": 7,
+        "freshness_coverage": 1.0,
+        "rationale_count": 7,
+        "rationale_coverage": 1.0,
+        "invalid_top_of_mind_count": 0,
+        "untraced_entry_count": 0,
+        "derived_with_source_or_unsupported_count": 2,
+        "derived_missing_source_or_unsupported_count": 0,
+        "unsupported_derived_entry_count": 1,
+        "unsupported_current_entry_count": 0,
+        "tombstone_ref_count": 1,
+        "source_trace_selected_count": 2,
+        "source_trace_dropped_count": 1,
+        "source_trace_stale_count": 1,
+        "source_trace_superseded_count": 1,
+        "source_trace_tombstone_count": 1
+      },
+      "trap_ids_used": [],
+      "dimension_scores": [
+        {
+          "dimension": "answer_correctness",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.2
+        },
+        {
+          "dimension": "evidence_grounding",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.25
+        },
+        {
+          "dimension": "lifecycle_behavior",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.25
+        },
+        {
+          "dimension": "trap_avoidance",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.15
+        },
+        {
+          "dimension": "uncertainty_handling",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.15
+        }
+      ],
+      "reason": "Job passed with normalized_score 1.000.",
+      "evidence_required_count": 4,
+      "evidence_covered_count": 4,
+      "source_ref_required_count": 4,
+      "source_ref_covered_count": 4,
+      "quote_required_count": 4,
+      "quote_covered_count": 4,
+      "stale_retrieval_count": 0,
+      "scope_check_count": 0,
+      "scope_correct_count": 0,
+      "scope_violation_count": 0,
+      "redaction_leak_count": 0,
+      "qdrant_rebuild_case": false
+    },
+    {
+      "suite_id": "proactive_brief",
+      "job_id": "proactive-daily-project-brief-001",
+      "title": "Generate a daily project brief from current project memory",
+      "status": "pass",
+      "answer_type": "proactive_project_brief",
+      "requires_caveat": false,
+      "requires_refusal": false,
+      "can_answer_unknown": true,
+      "normalized_score": 1.0,
+      "hard_fail_hits": [],
+      "expected_evidence": [
+        {
+          "evidence_id": "daily-current-validation-gate",
+          "claim_id": "daily_validation_gate",
+          "requirement": "cite"
+        },
+        {
+          "evidence_id": "daily-current-ledger-update",
+          "claim_id": "daily_ledger_update",
+          "requirement": "cite"
+        }
+      ],
+      "produced_answer": "Run the proactive brief benchmark gate Run the proactive brief fixture command before claiming the lane is validation-ready, then update the XY-951 ledger.",
+      "produced_evidence": [
+        "daily-current-ledger-update",
+        "daily-current-validation-gate"
+      ],
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available": false,
+      "temporal_validity_not_encoded": false,
+      "history_readback_encoded": false,
+      "retrieval_quality": {
+        "expected_evidence_total": 2,
+        "expected_evidence_matched": 2,
+        "expected_evidence_recall": 1.0,
+        "produced_evidence_total": 2,
+        "irrelevant_context_count": 0,
+        "irrelevant_context_ratio": 0.0,
+        "trap_context_count": 0
+      },
+      "latency_ms": 6.884306,
+      "cost": {
+        "currency": "USD",
+        "amount": 0.0,
+        "input_tokens": 0,
+        "output_tokens": 0
+      },
+      "trace_explainability": {
+        "trace_id": "fc854889-2ac4-436b-a885-b43053922cb9",
+        "stages": [
+          {
+            "stage_name": "dreaming_readback.service_list",
+            "kept_evidence": [
+              "daily-current-ledger-update",
+              "daily-current-validation-gate",
+              "daily-old-parity-trap"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Read 3 source refs from ElfService::list for proactive_brief."
+          },
+          {
+            "stage_name": "dreaming_readback.source_mutation_guard",
+            "kept_evidence": [
+              "daily-current-ledger-update",
+              "daily-current-validation-gate",
+              "daily-old-parity-trap"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Generated readback artifacts without mutating source notes."
+          }
+        ]
+      },
+      "proactive_brief": {
+        "brief_count": 1,
+        "suggestion_count": 1,
+        "required_suggestion_kind_count": 1,
+        "covered_required_suggestion_kind_count": 1,
+        "missing_required_suggestion_kind_count": 0,
+        "evidence_ref_required_count": 1,
+        "evidence_ref_suggestion_count": 1,
+        "evidence_ref_coverage": 1.0,
+        "freshness_marker_count": 1,
+        "freshness_coverage": 1.0,
+        "action_rationale_count": 1,
+        "action_rationale_coverage": 1.0,
+        "recommended_count": 1,
+        "deferred_count": 0,
+        "rejected_count": 0,
+        "current_suggestion_count": 1,
+        "non_current_suggestion_count": 0,
+        "stale_warning_count": 0,
+        "invalid_current_suggestion_count": 0,
+        "untraced_suggestion_count": 0,
+        "unsupported_current_suggestion_count": 0,
+        "tombstone_violation_count": 0,
+        "source_trace_selected_count": 2,
+        "source_trace_dropped_count": 0,
+        "source_trace_stale_count": 1,
+        "source_trace_superseded_count": 0,
+        "source_trace_tombstone_count": 0
+      },
+      "trap_ids_used": [],
+      "dimension_scores": [
+        {
+          "dimension": "answer_correctness",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.25
+        },
+        {
+          "dimension": "evidence_grounding",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.3
+        },
+        {
+          "dimension": "lifecycle_behavior",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.1
+        },
+        {
+          "dimension": "trap_avoidance",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.2
+        },
+        {
+          "dimension": "workflow_helpfulness",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.15
+        }
+      ],
+      "reason": "Job passed with normalized_score 1.000.",
+      "evidence_required_count": 2,
+      "evidence_covered_count": 2,
+      "source_ref_required_count": 2,
+      "source_ref_covered_count": 2,
+      "quote_required_count": 2,
+      "quote_covered_count": 2,
+      "stale_retrieval_count": 0,
+      "scope_check_count": 0,
+      "scope_correct_count": 0,
+      "scope_violation_count": 0,
+      "redaction_leak_count": 0,
+      "qdrant_rebuild_case": false
+    },
+    {
+      "suite_id": "proactive_brief",
+      "job_id": "proactive-private-corpus-refresh-blocked-001",
+      "title": "Block private-corpus refresh suggestions when no operator manifest exists",
+      "status": "blocked",
+      "answer_type": "proactive_project_brief",
+      "requires_caveat": true,
+      "requires_refusal": true,
+      "can_answer_unknown": true,
+      "normalized_score": 0.0,
+      "hard_fail_hits": [],
+      "expected_evidence": [],
+      "produced_answer": "",
+      "produced_evidence": [],
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available": false,
+      "temporal_validity_not_encoded": false,
+      "history_readback_encoded": false,
+      "retrieval_quality": {
+        "expected_evidence_total": 0,
+        "expected_evidence_matched": 0,
+        "expected_evidence_recall": 1.0,
+        "produced_evidence_total": 0,
+        "irrelevant_context_count": 0,
+        "irrelevant_context_ratio": 0.0,
+        "trap_context_count": 0
+      },
+      "latency_ms": 0.0,
+      "cost": {
+        "currency": "USD",
+        "amount": 0.0,
+        "input_tokens": 0,
+        "output_tokens": 0
+      },
+      "trace_explainability": {
+        "failure_stage": "live_adapter.suite_support",
+        "failure_reason": "No operator-owned private production corpus manifest is available; private-corpus refresh suggestions stay blocked under XY-930.",
+        "stages": [
+          {
+            "stage_name": "live_adapter.suite_support",
+            "kept_evidence": [],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "No operator-owned private production corpus manifest is available; private-corpus refresh suggestions stay blocked under XY-930."
+          }
+        ]
+      },
+      "trap_ids_used": [],
+      "dimension_scores": [
+        {
+          "dimension": "answer_correctness",
+          "score": 0.0,
+          "max_points": 1.0,
+          "weight": 0.3
+        },
+        {
+          "dimension": "evidence_grounding",
+          "score": 0.0,
+          "max_points": 1.0,
+          "weight": 0.3
+        },
+        {
+          "dimension": "lifecycle_behavior",
+          "score": 0.0,
+          "max_points": 1.0,
+          "weight": 0.15
+        },
+        {
+          "dimension": "uncertainty_handling",
+          "score": 0.0,
+          "max_points": 1.0,
+          "weight": 0.25
+        }
+      ],
+      "reason": "No operator-owned private production corpus manifest is available; private-corpus refresh suggestions stay blocked under XY-930.",
+      "evidence_required_count": 0,
+      "evidence_covered_count": 0,
+      "source_ref_required_count": 0,
+      "source_ref_covered_count": 0,
+      "quote_required_count": 0,
+      "quote_covered_count": 0,
+      "stale_retrieval_count": 0,
+      "scope_check_count": 0,
+      "scope_correct_count": 0,
+      "scope_violation_count": 0,
+      "redaction_leak_count": 0,
+      "qdrant_rebuild_case": false
+    },
+    {
+      "suite_id": "proactive_brief",
+      "job_id": "proactive-resume-work-brief-001",
+      "title": "Generate a resume-work brief from current handoff memory",
+      "status": "pass",
+      "answer_type": "proactive_project_brief",
+      "requires_caveat": false,
+      "requires_refusal": false,
+      "can_answer_unknown": true,
+      "normalized_score": 1.0,
+      "hard_fail_hits": [],
+      "expected_evidence": [
+        {
+          "evidence_id": "resume-current-handoff",
+          "claim_id": "resume_current_handoff",
+          "requirement": "cite"
+        },
+        {
+          "evidence_id": "resume-current-validation",
+          "claim_id": "resume_validation",
+          "requirement": "cite"
+        }
+      ],
+      "produced_answer": "Continue proactive brief scoring Continue the XY-953 fixture and runner scoring work on y/elf-xy-953, then run the proactive brief benchmark command.",
+      "produced_evidence": [
+        "resume-current-handoff",
+        "resume-current-validation"
+      ],
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available": false,
+      "temporal_validity_not_encoded": false,
+      "history_readback_encoded": false,
+      "retrieval_quality": {
+        "expected_evidence_total": 2,
+        "expected_evidence_matched": 2,
+        "expected_evidence_recall": 1.0,
+        "produced_evidence_total": 2,
+        "irrelevant_context_count": 0,
+        "irrelevant_context_ratio": 0.0,
+        "trap_context_count": 0
+      },
+      "latency_ms": 7.336724,
+      "cost": {
+        "currency": "USD",
+        "amount": 0.0,
+        "input_tokens": 0,
+        "output_tokens": 0
+      },
+      "trace_explainability": {
+        "trace_id": "c77d3ddb-d0c0-4168-a528-a585adfc8a7f",
+        "stages": [
+          {
+            "stage_name": "dreaming_readback.service_list",
+            "kept_evidence": [
+              "resume-current-handoff",
+              "resume-current-validation",
+              "resume-stale-validation"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Read 3 source refs from ElfService::list for proactive_brief."
+          },
+          {
+            "stage_name": "dreaming_readback.source_mutation_guard",
+            "kept_evidence": [
+              "resume-current-handoff",
+              "resume-current-validation",
+              "resume-stale-validation"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Generated readback artifacts without mutating source notes."
+          }
+        ]
+      },
+      "proactive_brief": {
+        "brief_count": 1,
+        "suggestion_count": 1,
+        "required_suggestion_kind_count": 1,
+        "covered_required_suggestion_kind_count": 1,
+        "missing_required_suggestion_kind_count": 0,
+        "evidence_ref_required_count": 1,
+        "evidence_ref_suggestion_count": 1,
+        "evidence_ref_coverage": 1.0,
+        "freshness_marker_count": 1,
+        "freshness_coverage": 1.0,
+        "action_rationale_count": 1,
+        "action_rationale_coverage": 1.0,
+        "recommended_count": 1,
+        "deferred_count": 0,
+        "rejected_count": 0,
+        "current_suggestion_count": 1,
+        "non_current_suggestion_count": 0,
+        "stale_warning_count": 0,
+        "invalid_current_suggestion_count": 0,
+        "untraced_suggestion_count": 0,
+        "unsupported_current_suggestion_count": 0,
+        "tombstone_violation_count": 0,
+        "source_trace_selected_count": 2,
+        "source_trace_dropped_count": 0,
+        "source_trace_stale_count": 1,
+        "source_trace_superseded_count": 0,
+        "source_trace_tombstone_count": 0
+      },
+      "trap_ids_used": [],
+      "dimension_scores": [
+        {
+          "dimension": "answer_correctness",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.25
+        },
+        {
+          "dimension": "evidence_grounding",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.3
+        },
+        {
+          "dimension": "lifecycle_behavior",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.1
+        },
+        {
+          "dimension": "trap_avoidance",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.2
+        },
+        {
+          "dimension": "workflow_helpfulness",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.15
+        }
+      ],
+      "reason": "Job passed with normalized_score 1.000.",
+      "evidence_required_count": 2,
+      "evidence_covered_count": 2,
+      "source_ref_required_count": 2,
+      "source_ref_covered_count": 2,
+      "quote_required_count": 2,
+      "quote_covered_count": 2,
+      "stale_retrieval_count": 0,
+      "scope_check_count": 0,
+      "scope_correct_count": 0,
+      "scope_violation_count": 0,
+      "redaction_leak_count": 0,
+      "qdrant_rebuild_case": false
+    },
+    {
+      "suite_id": "proactive_brief",
+      "job_id": "proactive-stale-decision-audit-001",
+      "title": "Warn about a stale project decision before suggesting work",
+      "status": "pass",
+      "answer_type": "proactive_project_brief",
+      "requires_caveat": false,
+      "requires_refusal": false,
+      "can_answer_unknown": true,
+      "normalized_score": 1.0,
+      "hard_fail_hits": [],
+      "expected_evidence": [
+        {
+          "evidence_id": "stale-decision-old-gate",
+          "claim_id": "stale_decision_replaced",
+          "requirement": "cite"
+        },
+        {
+          "evidence_id": "stale-decision-new-gate",
+          "claim_id": "stale_decision_replaced",
+          "requirement": "cite"
+        }
+      ],
+      "produced_answer": "Defer the old operator-ux-only readiness gate Do not use the old operator-ux-only decision as current readiness evidence; it is superseded by the direct proactive brief suite.",
+      "produced_evidence": [
+        "stale-decision-new-gate",
+        "stale-decision-old-gate"
+      ],
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available": false,
+      "temporal_validity_not_encoded": false,
+      "history_readback_encoded": false,
+      "retrieval_quality": {
+        "expected_evidence_total": 2,
+        "expected_evidence_matched": 2,
+        "expected_evidence_recall": 1.0,
+        "produced_evidence_total": 2,
+        "irrelevant_context_count": 0,
+        "irrelevant_context_ratio": 0.0,
+        "trap_context_count": 1
+      },
+      "latency_ms": 9.269811,
+      "cost": {
+        "currency": "USD",
+        "amount": 0.0,
+        "input_tokens": 0,
+        "output_tokens": 0
+      },
+      "trace_explainability": {
+        "trace_id": "d7decd9a-d635-41b5-9dcc-c6e3c5c44fb7",
+        "stages": [
+          {
+            "stage_name": "dreaming_readback.service_list",
+            "kept_evidence": [
+              "stale-decision-new-gate",
+              "stale-decision-old-gate"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Read 2 source refs from ElfService::list for proactive_brief."
+          },
+          {
+            "stage_name": "dreaming_readback.source_mutation_guard",
+            "kept_evidence": [
+              "stale-decision-new-gate",
+              "stale-decision-old-gate"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Generated readback artifacts without mutating source notes."
+          }
+        ]
+      },
+      "proactive_brief": {
+        "brief_count": 1,
+        "suggestion_count": 1,
+        "required_suggestion_kind_count": 1,
+        "covered_required_suggestion_kind_count": 1,
+        "missing_required_suggestion_kind_count": 0,
+        "evidence_ref_required_count": 1,
+        "evidence_ref_suggestion_count": 1,
+        "evidence_ref_coverage": 1.0,
+        "freshness_marker_count": 1,
+        "freshness_coverage": 1.0,
+        "action_rationale_count": 1,
+        "action_rationale_coverage": 1.0,
+        "recommended_count": 0,
+        "deferred_count": 1,
+        "rejected_count": 0,
+        "current_suggestion_count": 0,
+        "non_current_suggestion_count": 1,
+        "stale_warning_count": 1,
+        "invalid_current_suggestion_count": 0,
+        "untraced_suggestion_count": 0,
+        "unsupported_current_suggestion_count": 0,
+        "tombstone_violation_count": 0,
+        "source_trace_selected_count": 1,
+        "source_trace_dropped_count": 0,
+        "source_trace_stale_count": 0,
+        "source_trace_superseded_count": 1,
+        "source_trace_tombstone_count": 0
+      },
+      "trap_ids_used": [],
+      "dimension_scores": [
+        {
+          "dimension": "answer_correctness",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.25
+        },
+        {
+          "dimension": "evidence_grounding",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.3
+        },
+        {
+          "dimension": "lifecycle_behavior",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.1
+        },
+        {
+          "dimension": "trap_avoidance",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.2
+        },
+        {
+          "dimension": "workflow_helpfulness",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.15
+        }
+      ],
+      "reason": "Job passed with normalized_score 1.000.",
+      "evidence_required_count": 2,
+      "evidence_covered_count": 2,
+      "source_ref_required_count": 2,
+      "source_ref_covered_count": 2,
+      "quote_required_count": 2,
+      "quote_covered_count": 2,
+      "stale_retrieval_count": 0,
+      "scope_check_count": 0,
+      "scope_correct_count": 0,
+      "scope_violation_count": 0,
+      "redaction_leak_count": 0,
+      "qdrant_rebuild_case": false
+    },
+    {
+      "suite_id": "proactive_brief",
+      "job_id": "proactive-stale-plan-preference-warning-001",
+      "title": "Reject stale plan and preference suggestions after TTL invalidation",
+      "status": "pass",
+      "answer_type": "proactive_project_brief",
+      "requires_caveat": false,
+      "requires_refusal": false,
+      "can_answer_unknown": true,
+      "normalized_score": 1.0,
+      "hard_fail_hits": [],
+      "expected_evidence": [
+        {
+          "evidence_id": "stale-plan-ttl",
+          "claim_id": "stale_plan_rejected",
+          "requirement": "cite"
+        },
+        {
+          "evidence_id": "current-preference-concise-brief",
+          "claim_id": "current_preference_concise",
+          "requirement": "cite"
+        }
+      ],
+      "produced_answer": "Reject the expired publish-first plan Do not publish the proactive report before running the new proactive brief benchmark; the old plan expired under TTL. Defer long product-comparison prose Use concise evidence-linked proactive briefs and avoid broad hosted-product parity claims.",
+      "produced_evidence": [
+        "current-plan-run-gate",
+        "current-preference-concise-brief",
+        "old-preference-long-brief",
+        "stale-plan-old",
+        "stale-plan-ttl"
+      ],
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available": false,
+      "temporal_validity_not_encoded": false,
+      "history_readback_encoded": false,
+      "retrieval_quality": {
+        "expected_evidence_total": 2,
+        "expected_evidence_matched": 2,
+        "expected_evidence_recall": 1.0,
+        "produced_evidence_total": 5,
+        "irrelevant_context_count": 0,
+        "irrelevant_context_ratio": 0.0,
+        "trap_context_count": 1
+      },
+      "latency_ms": 7.991892,
+      "cost": {
+        "currency": "USD",
+        "amount": 0.0,
+        "input_tokens": 0,
+        "output_tokens": 0
+      },
+      "trace_explainability": {
+        "trace_id": "f2e795b5-7ac4-4f7d-ab49-75392f6ba8a8",
+        "stages": [
+          {
+            "stage_name": "dreaming_readback.service_list",
+            "kept_evidence": [
+              "current-plan-run-gate",
+              "current-preference-concise-brief",
+              "old-preference-long-brief",
+              "stale-plan-old",
+              "stale-plan-ttl"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Read 5 source refs from ElfService::list for proactive_brief."
+          },
+          {
+            "stage_name": "dreaming_readback.source_mutation_guard",
+            "kept_evidence": [
+              "current-plan-run-gate",
+              "current-preference-concise-brief",
+              "old-preference-long-brief",
+              "stale-plan-old",
+              "stale-plan-ttl"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Generated readback artifacts without mutating source notes."
+          }
+        ]
+      },
+      "proactive_brief": {
+        "brief_count": 1,
+        "suggestion_count": 2,
+        "required_suggestion_kind_count": 1,
+        "covered_required_suggestion_kind_count": 1,
+        "missing_required_suggestion_kind_count": 0,
+        "evidence_ref_required_count": 2,
+        "evidence_ref_suggestion_count": 2,
+        "evidence_ref_coverage": 1.0,
+        "freshness_marker_count": 2,
+        "freshness_coverage": 1.0,
+        "action_rationale_count": 2,
+        "action_rationale_coverage": 1.0,
+        "recommended_count": 0,
+        "deferred_count": 1,
+        "rejected_count": 1,
+        "current_suggestion_count": 0,
+        "non_current_suggestion_count": 2,
+        "stale_warning_count": 2,
+        "invalid_current_suggestion_count": 0,
+        "untraced_suggestion_count": 0,
+        "unsupported_current_suggestion_count": 0,
+        "tombstone_violation_count": 0,
+        "source_trace_selected_count": 2,
+        "source_trace_dropped_count": 0,
+        "source_trace_stale_count": 0,
+        "source_trace_superseded_count": 1,
+        "source_trace_tombstone_count": 1
+      },
+      "trap_ids_used": [],
+      "dimension_scores": [
+        {
+          "dimension": "answer_correctness",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.25
+        },
+        {
+          "dimension": "evidence_grounding",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.3
+        },
+        {
+          "dimension": "lifecycle_behavior",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.1
+        },
+        {
+          "dimension": "trap_avoidance",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.2
+        },
+        {
+          "dimension": "workflow_helpfulness",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.15
+        }
+      ],
+      "reason": "Job passed with normalized_score 1.000.",
+      "evidence_required_count": 2,
+      "evidence_covered_count": 2,
+      "source_ref_required_count": 2,
+      "source_ref_covered_count": 2,
+      "quote_required_count": 2,
+      "quote_covered_count": 2,
+      "stale_retrieval_count": 0,
+      "scope_check_count": 0,
+      "scope_correct_count": 0,
+      "scope_violation_count": 0,
+      "redaction_leak_count": 0,
+      "qdrant_rebuild_case": false
+    },
+    {
+      "suite_id": "scheduled_memory",
+      "job_id": "scheduled-knowledge-page-refresh-suggestion-001",
+      "title": "Suggest a knowledge-page refresh from scheduled memory",
+      "status": "pass",
+      "answer_type": "scheduled_memory_task",
+      "requires_caveat": false,
+      "requires_refusal": false,
+      "can_answer_unknown": true,
+      "normalized_score": 1.0,
+      "hard_fail_hits": [],
+      "expected_evidence": [
+        {
+          "evidence_id": "scheduled-knowledge-page-stale-finding",
+          "claim_id": "scheduled_knowledge_refresh_suggested",
+          "requirement": "cite"
+        },
+        {
+          "evidence_id": "scheduled-knowledge-reviewable-refresh",
+          "claim_id": "scheduled_knowledge_refresh_suggested",
+          "requirement": "cite"
+        }
+      ],
+      "produced_answer": "Suggest a reviewable knowledge-page rebuild for the stale scheduled-memory blocked-state reference; do not rewrite source notes silently.",
+      "produced_evidence": [
+        "scheduled-knowledge-page-stale-finding",
+        "scheduled-knowledge-reviewable-refresh"
+      ],
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available": false,
+      "temporal_validity_not_encoded": false,
+      "history_readback_encoded": false,
+      "retrieval_quality": {
+        "expected_evidence_total": 2,
+        "expected_evidence_matched": 2,
+        "expected_evidence_recall": 1.0,
+        "produced_evidence_total": 2,
+        "irrelevant_context_count": 0,
+        "irrelevant_context_ratio": 0.0,
+        "trap_context_count": 0
+      },
+      "latency_ms": 6.31843,
+      "cost": {
+        "currency": "USD",
+        "amount": 0.0,
+        "input_tokens": 0,
+        "output_tokens": 0
+      },
+      "trace_explainability": {
+        "trace_id": "df5b34bc-b8bd-427c-a531-7c37ff2444c8",
+        "stages": [
+          {
+            "stage_name": "dreaming_readback.service_list",
+            "kept_evidence": [
+              "scheduled-knowledge-page-stale-finding",
+              "scheduled-knowledge-reviewable-refresh",
+              "scheduled-knowledge-silent-rewrite-trap"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Read 3 source refs from ElfService::list for scheduled_memory."
+          },
+          {
+            "stage_name": "dreaming_readback.source_mutation_guard",
+            "kept_evidence": [
+              "scheduled-knowledge-page-stale-finding",
+              "scheduled-knowledge-reviewable-refresh",
+              "scheduled-knowledge-silent-rewrite-trap"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Generated readback artifacts without mutating source notes."
+          }
+        ]
+      },
+      "scheduled_memory": {
+        "task_run_count": 1,
+        "output_count": 1,
+        "required_task_kind_count": 1,
+        "covered_required_task_kind_count": 1,
+        "missing_required_task_kind_count": 0,
+        "evidence_ref_required_count": 1,
+        "evidence_ref_output_count": 1,
+        "evidence_ref_coverage": 1.0,
+        "freshness_marker_count": 1,
+        "freshness_coverage": 1.0,
+        "action_rationale_count": 1,
+        "action_rationale_coverage": 1.0,
+        "trace_required_count": 1,
+        "trace_complete_count": 1,
+        "trace_coverage": 1.0,
+        "source_mutation_count": 0,
+        "current_output_count": 1,
+        "non_current_output_count": 0,
+        "invalid_current_output_count": 0,
+        "untraced_output_count": 0,
+        "unsupported_current_output_count": 0,
+        "tombstone_violation_count": 0,
+        "source_trace_selected_count": 2,
+        "source_trace_dropped_count": 0,
+        "source_trace_stale_count": 1,
+        "source_trace_superseded_count": 0,
+        "source_trace_tombstone_count": 0
+      },
+      "trap_ids_used": [],
+      "dimension_scores": [
+        {
+          "dimension": "answer_correctness",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.25
+        },
+        {
+          "dimension": "evidence_grounding",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.25
+        },
+        {
+          "dimension": "source_immutability",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.15
+        },
+        {
+          "dimension": "trace_readback",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.2
+        },
+        {
+          "dimension": "trap_avoidance",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.15
+        }
+      ],
+      "reason": "Job passed with normalized_score 1.000.",
+      "evidence_required_count": 2,
+      "evidence_covered_count": 2,
+      "source_ref_required_count": 2,
+      "source_ref_covered_count": 2,
+      "quote_required_count": 2,
+      "quote_covered_count": 2,
+      "stale_retrieval_count": 0,
+      "scope_check_count": 0,
+      "scope_correct_count": 0,
+      "scope_violation_count": 0,
+      "redaction_leak_count": 0,
+      "qdrant_rebuild_case": false
+    },
+    {
+      "suite_id": "scheduled_memory",
+      "job_id": "scheduled-private-provider-scheduler-blocked-001",
+      "title": "Block private/provider scheduled tasks without operator inputs",
+      "status": "blocked",
+      "answer_type": "scheduled_memory_task",
+      "requires_caveat": true,
+      "requires_refusal": true,
+      "can_answer_unknown": true,
+      "normalized_score": 0.0,
+      "hard_fail_hits": [],
+      "expected_evidence": [],
+      "produced_answer": "",
+      "produced_evidence": [],
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available": false,
+      "temporal_validity_not_encoded": false,
+      "history_readback_encoded": false,
+      "retrieval_quality": {
+        "expected_evidence_total": 0,
+        "expected_evidence_matched": 0,
+        "expected_evidence_recall": 1.0,
+        "produced_evidence_total": 0,
+        "irrelevant_context_count": 0,
+        "irrelevant_context_ratio": 0.0,
+        "trap_context_count": 0
+      },
+      "latency_ms": 0.0,
+      "cost": {
+        "currency": "USD",
+        "amount": 0.0,
+        "input_tokens": 0,
+        "output_tokens": 0
+      },
+      "trace_explainability": {
+        "failure_stage": "live_adapter.suite_support",
+        "failure_reason": "No operator-owned private production corpus manifest, provider credentials, or hosted scheduler configuration is available; private/provider scheduled tasks stay blocked under XY-930.",
+        "stages": [
+          {
+            "stage_name": "live_adapter.suite_support",
+            "kept_evidence": [],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "No operator-owned private production corpus manifest, provider credentials, or hosted scheduler configuration is available; private/provider scheduled tasks stay blocked under XY-930."
+          }
+        ]
+      },
+      "trap_ids_used": [],
+      "dimension_scores": [
+        {
+          "dimension": "answer_correctness",
+          "score": 0.0,
+          "max_points": 1.0,
+          "weight": 0.3
+        },
+        {
+          "dimension": "evidence_grounding",
+          "score": 0.0,
+          "max_points": 1.0,
+          "weight": 0.3
+        },
+        {
+          "dimension": "lifecycle_behavior",
+          "score": 0.0,
+          "max_points": 1.0,
+          "weight": 0.15
+        },
+        {
+          "dimension": "uncertainty_handling",
+          "score": 0.0,
+          "max_points": 1.0,
+          "weight": 0.25
+        }
+      ],
+      "reason": "No operator-owned private production corpus manifest, provider credentials, or hosted scheduler configuration is available; private/provider scheduled tasks stay blocked under XY-930.",
+      "evidence_required_count": 0,
+      "evidence_covered_count": 0,
+      "source_ref_required_count": 0,
+      "source_ref_covered_count": 0,
+      "quote_required_count": 0,
+      "quote_covered_count": 0,
+      "stale_retrieval_count": 0,
+      "scope_check_count": 0,
+      "scope_correct_count": 0,
+      "scope_violation_count": 0,
+      "redaction_leak_count": 0,
+      "qdrant_rebuild_case": false
+    },
+    {
+      "suite_id": "scheduled_memory",
+      "job_id": "scheduled-stale-decision-audit-001",
+      "title": "Audit a stale project decision during a scheduled task",
+      "status": "pass",
+      "answer_type": "scheduled_memory_task",
+      "requires_caveat": false,
+      "requires_refusal": false,
+      "can_answer_unknown": true,
+      "normalized_score": 1.0,
+      "hard_fail_hits": [],
+      "expected_evidence": [
+        {
+          "evidence_id": "scheduled-old-consolidation-only-decision",
+          "claim_id": "scheduled_decision_superseded",
+          "requirement": "cite"
+        },
+        {
+          "evidence_id": "scheduled-current-direct-suite-decision",
+          "claim_id": "scheduled_decision_superseded",
+          "requirement": "cite"
+        }
+      ],
+      "produced_answer": "Defer the consolidation-only scheduled readiness decision; the current gate is the direct scheduled-memory fixture suite plus aggregate regression guard.",
+      "produced_evidence": [
+        "scheduled-current-direct-suite-decision",
+        "scheduled-old-consolidation-only-decision"
+      ],
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available": false,
+      "temporal_validity_not_encoded": false,
+      "history_readback_encoded": false,
+      "retrieval_quality": {
+        "expected_evidence_total": 2,
+        "expected_evidence_matched": 2,
+        "expected_evidence_recall": 1.0,
+        "produced_evidence_total": 2,
+        "irrelevant_context_count": 0,
+        "irrelevant_context_ratio": 0.0,
+        "trap_context_count": 1
+      },
+      "latency_ms": 5.7482619999999995,
+      "cost": {
+        "currency": "USD",
+        "amount": 0.0,
+        "input_tokens": 0,
+        "output_tokens": 0
+      },
+      "trace_explainability": {
+        "trace_id": "3ca5cf35-007e-4c15-9dce-3983a7053e9a",
+        "stages": [
+          {
+            "stage_name": "dreaming_readback.service_list",
+            "kept_evidence": [
+              "scheduled-current-direct-suite-decision",
+              "scheduled-old-consolidation-only-decision"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Read 2 source refs from ElfService::list for scheduled_memory."
+          },
+          {
+            "stage_name": "dreaming_readback.source_mutation_guard",
+            "kept_evidence": [
+              "scheduled-current-direct-suite-decision",
+              "scheduled-old-consolidation-only-decision"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Generated readback artifacts without mutating source notes."
+          }
+        ]
+      },
+      "scheduled_memory": {
+        "task_run_count": 1,
+        "output_count": 1,
+        "required_task_kind_count": 1,
+        "covered_required_task_kind_count": 1,
+        "missing_required_task_kind_count": 0,
+        "evidence_ref_required_count": 1,
+        "evidence_ref_output_count": 1,
+        "evidence_ref_coverage": 1.0,
+        "freshness_marker_count": 1,
+        "freshness_coverage": 1.0,
+        "action_rationale_count": 1,
+        "action_rationale_coverage": 1.0,
+        "trace_required_count": 1,
+        "trace_complete_count": 1,
+        "trace_coverage": 1.0,
+        "source_mutation_count": 0,
+        "current_output_count": 0,
+        "non_current_output_count": 1,
+        "invalid_current_output_count": 0,
+        "untraced_output_count": 0,
+        "unsupported_current_output_count": 0,
+        "tombstone_violation_count": 0,
+        "source_trace_selected_count": 1,
+        "source_trace_dropped_count": 0,
+        "source_trace_stale_count": 0,
+        "source_trace_superseded_count": 1,
+        "source_trace_tombstone_count": 0
+      },
+      "trap_ids_used": [],
+      "dimension_scores": [
+        {
+          "dimension": "answer_correctness",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.25
+        },
+        {
+          "dimension": "evidence_grounding",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.25
+        },
+        {
+          "dimension": "lifecycle_behavior",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.15
+        },
+        {
+          "dimension": "trace_readback",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.2
+        },
+        {
+          "dimension": "trap_avoidance",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.15
+        }
+      ],
+      "reason": "Job passed with normalized_score 1.000.",
+      "evidence_required_count": 2,
+      "evidence_covered_count": 2,
+      "source_ref_required_count": 2,
+      "source_ref_covered_count": 2,
+      "quote_required_count": 2,
+      "quote_covered_count": 2,
+      "stale_retrieval_count": 0,
+      "scope_check_count": 0,
+      "scope_correct_count": 0,
+      "scope_violation_count": 0,
+      "redaction_leak_count": 0,
+      "qdrant_rebuild_case": false
+    },
+    {
+      "suite_id": "scheduled_memory",
+      "job_id": "scheduled-stale-preference-plan-audit-001",
+      "title": "Audit stale preferences and plans during a scheduled task",
+      "status": "pass",
+      "answer_type": "scheduled_memory_task",
+      "requires_caveat": false,
+      "requires_refusal": false,
+      "can_answer_unknown": true,
+      "normalized_score": 1.0,
+      "hard_fail_hits": [],
+      "expected_evidence": [
+        {
+          "evidence_id": "scheduled-stale-old-plan",
+          "claim_id": "scheduled_stale_plan_expired",
+          "requirement": "cite"
+        },
+        {
+          "evidence_id": "scheduled-stale-plan-expired",
+          "claim_id": "scheduled_stale_plan_expired",
+          "requirement": "cite"
+        },
+        {
+          "evidence_id": "scheduled-current-trace-plan",
+          "claim_id": "scheduled_stale_plan_expired",
+          "requirement": "cite"
+        },
+        {
+          "evidence_id": "scheduled-current-reviewable-preference",
+          "claim_id": "scheduled_silent_mutation_rejected",
+          "requirement": "cite"
+        }
+      ],
+      "produced_answer": "Defer the old scheduled-memory report plan because it expired; use the current trace/readback requirement instead. Reject silent source-note mutation during scheduled audits and keep the audit output reviewable.",
+      "produced_evidence": [
+        "scheduled-current-reviewable-preference",
+        "scheduled-current-trace-plan",
+        "scheduled-old-silent-mutation-preference",
+        "scheduled-stale-old-plan",
+        "scheduled-stale-plan-expired"
+      ],
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available": false,
+      "temporal_validity_not_encoded": false,
+      "history_readback_encoded": false,
+      "retrieval_quality": {
+        "expected_evidence_total": 4,
+        "expected_evidence_matched": 4,
+        "expected_evidence_recall": 1.0,
+        "produced_evidence_total": 5,
+        "irrelevant_context_count": 0,
+        "irrelevant_context_ratio": 0.0,
+        "trap_context_count": 1
+      },
+      "latency_ms": 7.603808,
+      "cost": {
+        "currency": "USD",
+        "amount": 0.0,
+        "input_tokens": 0,
+        "output_tokens": 0
+      },
+      "trace_explainability": {
+        "trace_id": "8e5741df-c5d5-4e82-a32d-dc8606e8b876",
+        "stages": [
+          {
+            "stage_name": "dreaming_readback.service_list",
+            "kept_evidence": [
+              "scheduled-current-reviewable-preference",
+              "scheduled-current-trace-plan",
+              "scheduled-old-silent-mutation-preference",
+              "scheduled-stale-old-plan",
+              "scheduled-stale-plan-expired"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Read 5 source refs from ElfService::list for scheduled_memory."
+          },
+          {
+            "stage_name": "dreaming_readback.source_mutation_guard",
+            "kept_evidence": [
+              "scheduled-current-reviewable-preference",
+              "scheduled-current-trace-plan",
+              "scheduled-old-silent-mutation-preference",
+              "scheduled-stale-old-plan",
+              "scheduled-stale-plan-expired"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Generated readback artifacts without mutating source notes."
+          }
+        ]
+      },
+      "scheduled_memory": {
+        "task_run_count": 1,
+        "output_count": 2,
+        "required_task_kind_count": 1,
+        "covered_required_task_kind_count": 1,
+        "missing_required_task_kind_count": 0,
+        "evidence_ref_required_count": 2,
+        "evidence_ref_output_count": 2,
+        "evidence_ref_coverage": 1.0,
+        "freshness_marker_count": 2,
+        "freshness_coverage": 1.0,
+        "action_rationale_count": 2,
+        "action_rationale_coverage": 1.0,
+        "trace_required_count": 1,
+        "trace_complete_count": 1,
+        "trace_coverage": 1.0,
+        "source_mutation_count": 0,
+        "current_output_count": 0,
+        "non_current_output_count": 2,
+        "invalid_current_output_count": 0,
+        "untraced_output_count": 0,
+        "unsupported_current_output_count": 0,
+        "tombstone_violation_count": 0,
+        "source_trace_selected_count": 2,
+        "source_trace_dropped_count": 0,
+        "source_trace_stale_count": 0,
+        "source_trace_superseded_count": 2,
+        "source_trace_tombstone_count": 1
+      },
+      "trap_ids_used": [],
+      "dimension_scores": [
+        {
+          "dimension": "answer_correctness",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.2
+        },
+        {
+          "dimension": "evidence_grounding",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.25
+        },
+        {
+          "dimension": "lifecycle_behavior",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.2
+        },
+        {
+          "dimension": "trace_readback",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.2
+        },
+        {
+          "dimension": "trap_avoidance",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.15
+        }
+      ],
+      "reason": "Job passed with normalized_score 1.000.",
+      "evidence_required_count": 4,
+      "evidence_covered_count": 4,
+      "source_ref_required_count": 4,
+      "source_ref_covered_count": 4,
+      "quote_required_count": 4,
+      "quote_covered_count": 4,
+      "stale_retrieval_count": 0,
+      "scope_check_count": 0,
+      "scope_correct_count": 0,
+      "scope_violation_count": 0,
+      "redaction_leak_count": 0,
+      "qdrant_rebuild_case": false
+    },
+    {
+      "suite_id": "scheduled_memory",
+      "job_id": "scheduled-weekly-project-status-summary-001",
+      "title": "Run a weekly project status summary from current memory",
+      "status": "pass",
+      "answer_type": "scheduled_memory_task",
+      "requires_caveat": false,
+      "requires_refusal": false,
+      "can_answer_unknown": true,
+      "normalized_score": 1.0,
+      "hard_fail_hits": [],
+      "expected_evidence": [
+        {
+          "evidence_id": "scheduled-weekly-current-gate",
+          "claim_id": "scheduled_weekly_gate",
+          "requirement": "cite"
+        },
+        {
+          "evidence_id": "scheduled-weekly-ledger-update",
+          "claim_id": "scheduled_weekly_ledger",
+          "requirement": "cite"
+        }
+      ],
+      "produced_answer": "Run the scheduled-memory fixture command, update the XY-951 scheduled-memory-task readiness stage, and keep hosted scheduler parity out of the claim.",
+      "produced_evidence": [
+        "scheduled-weekly-current-gate",
+        "scheduled-weekly-ledger-update"
+      ],
+      "unsupported_claim_count": 0,
+      "wrong_result_count": 0,
+      "stale_answer_count": 0,
+      "conflict_detection_count": 0,
+      "update_rationale_available": false,
+      "temporal_validity_not_encoded": false,
+      "history_readback_encoded": false,
+      "retrieval_quality": {
+        "expected_evidence_total": 2,
+        "expected_evidence_matched": 2,
+        "expected_evidence_recall": 1.0,
+        "produced_evidence_total": 2,
+        "irrelevant_context_count": 0,
+        "irrelevant_context_ratio": 0.0,
+        "trap_context_count": 0
+      },
+      "latency_ms": 5.362345,
+      "cost": {
+        "currency": "USD",
+        "amount": 0.0,
+        "input_tokens": 0,
+        "output_tokens": 0
+      },
+      "trace_explainability": {
+        "trace_id": "12bcc69c-4971-4cd5-9f58-16ae45772e7f",
+        "stages": [
+          {
+            "stage_name": "dreaming_readback.service_list",
+            "kept_evidence": [
+              "scheduled-weekly-current-gate",
+              "scheduled-weekly-hosted-parity-trap",
+              "scheduled-weekly-ledger-update"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Read 3 source refs from ElfService::list for scheduled_memory."
+          },
+          {
+            "stage_name": "dreaming_readback.source_mutation_guard",
+            "kept_evidence": [
+              "scheduled-weekly-current-gate",
+              "scheduled-weekly-hosted-parity-trap",
+              "scheduled-weekly-ledger-update"
+            ],
+            "dropped_evidence": [],
+            "demoted_evidence": [],
+            "distractor_evidence": [],
+            "notes": "Generated readback artifacts without mutating source notes."
+          }
+        ]
+      },
+      "scheduled_memory": {
+        "task_run_count": 1,
+        "output_count": 1,
+        "required_task_kind_count": 1,
+        "covered_required_task_kind_count": 1,
+        "missing_required_task_kind_count": 0,
+        "evidence_ref_required_count": 1,
+        "evidence_ref_output_count": 1,
+        "evidence_ref_coverage": 1.0,
+        "freshness_marker_count": 1,
+        "freshness_coverage": 1.0,
+        "action_rationale_count": 1,
+        "action_rationale_coverage": 1.0,
+        "trace_required_count": 1,
+        "trace_complete_count": 1,
+        "trace_coverage": 1.0,
+        "source_mutation_count": 0,
+        "current_output_count": 1,
+        "non_current_output_count": 0,
+        "invalid_current_output_count": 0,
+        "untraced_output_count": 0,
+        "unsupported_current_output_count": 0,
+        "tombstone_violation_count": 0,
+        "source_trace_selected_count": 2,
+        "source_trace_dropped_count": 0,
+        "source_trace_stale_count": 1,
+        "source_trace_superseded_count": 0,
+        "source_trace_tombstone_count": 0
+      },
+      "trap_ids_used": [],
+      "dimension_scores": [
+        {
+          "dimension": "answer_correctness",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.25
+        },
+        {
+          "dimension": "evidence_grounding",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.25
+        },
+        {
+          "dimension": "lifecycle_behavior",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.15
+        },
+        {
+          "dimension": "trace_readback",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.2
+        },
+        {
+          "dimension": "trap_avoidance",
+          "score": 1.0,
+          "max_points": 1.0,
+          "weight": 0.15
+        }
+      ],
+      "reason": "Job passed with normalized_score 1.000.",
+      "evidence_required_count": 2,
+      "evidence_covered_count": 2,
+      "source_ref_required_count": 2,
+      "source_ref_covered_count": 2,
+      "quote_required_count": 2,
+      "quote_covered_count": 2,
+      "stale_retrieval_count": 0,
+      "scope_check_count": 0,
+      "scope_correct_count": 0,
+      "scope_violation_count": 0,
+      "redaction_leak_count": 0,
+      "qdrant_rebuild_case": false
+    }
+  ],
+  "unsupported_claims": [],
+  "not_encoded_suites": [
+    "trust_source_of_truth",
+    "work_resume",
+    "project_decisions",
+    "retrieval",
+    "memory_evolution",
+    "consolidation",
+    "knowledge_compilation",
+    "operator_debugging_ux",
+    "capture_integration",
+    "production_ops",
+    "personalization",
+    "core_archival_memory",
+    "context_trajectory"
+  ],
+  "private_corpus_redaction": {
+    "policy": "publish evidence ids and bounded score summaries only; do not publish private text",
+    "private_fixture_count": 2
+  },
+  "evolution": {
+    "stale_answer_count": 0,
+    "conflict_detection_count": 0,
+    "update_rationale_available_count": 0,
+    "temporal_validity_not_encoded_count": 0,
+    "history_readback_encoded_count": 0
+  },
+  "follow_ups": []
+}
\ No newline at end of file
diff --git a/apps/elf-eval/src/bin/real_world_live_adapter.rs b/apps/elf-eval/src/bin/real_world_live_adapter.rs
index 4c21b7ff..f40ec884 100644
--- a/apps/elf-eval/src/bin/real_world_live_adapter.rs
+++ b/apps/elf-eval/src/bin/real_world_live_adapter.rs
@@ -13,7 +13,7 @@ use std::{
 	time::{Duration, Instant},
 };
 
-use ::time::OffsetDateTime;
+use ::time::{OffsetDateTime, format_description::well_known::Rfc3339};
 use blake3::Hasher;
 use clap::{Parser, Subcommand, ValueEnum};
 use color_eyre::{self, eyre};
@@ -40,8 +40,8 @@ use elf_service::{
 	ConsolidationProposalResponse, ConsolidationProposalReviewRequest,
 	ConsolidationProposalsListRequest, ConsolidationRunCreateRequest, ElfService,
 	EmbeddingProvider, ExtractorProvider, KnowledgePageLintRequest, KnowledgePageLintResponse,
-	KnowledgePageRebuildRequest, KnowledgePageResponse, KnowledgePageSearchRequest, PayloadLevel,
-	Providers, RerankProvider, SearchItem, SearchRequest,
+	KnowledgePageRebuildRequest, KnowledgePageResponse, KnowledgePageSearchRequest, ListRequest,
+	PayloadLevel, Providers, RerankProvider, SearchItem, SearchRequest, SearchResponse,
 };
 use elf_storage::{db::Db, qdrant::QdrantStore};
 use elf_testkit::TestDatabase;
@@ -305,6 +305,8 @@ struct MaterializedJobEvidence {
 	knowledge: Option<KnowledgeMaterializationEvidence>,
 	#[serde(skip_serializing_if = "Option::is_none")]
 	temporal_reconciliation: Option<TemporalReconciliationMaterializationEvidence>,
+	#[serde(skip_serializing_if = "Option::is_none")]
+	dreaming_readback: Option<DreamingReadbackMaterializationEvidence>,
 }
 
 #[derive(Clone, Debug, Serialize)]
@@ -366,6 +368,19 @@ struct TemporalReconciliationMaterializationEvidence {
 	contradicted_by_lifecycle_evidence_ids: Vec<String>,
 }
 
+#[derive(Clone, Debug, Default, Serialize)]
+struct DreamingReadbackMaterializationEvidence {
+	artifact_kind: String,
+	runtime_path: String,
+	service_list_count: usize,
+	trace_id: Option<Uuid>,
+	generated_artifact_count: usize,
+	selected_source_refs: Vec<String>,
+	missing_source_refs: Vec<String>,
+	source_mutation_count: usize,
+	no_source_mutation_checked: bool,
+}
+
 #[derive(Clone, Debug, Serialize)]
 struct CaptureRuntimeSourceRefEvidence {
 	evidence_id: String,
@@ -407,6 +422,12 @@ struct AnswerOutput {
 	claims: Vec<serde_json::Value>,
 	#[serde(skip_serializing_if = "Vec::is_empty")]
 	pages: Vec<serde_json::Value>,
+	#[serde(skip_serializing_if = "Vec::is_empty")]
+	memory_summaries: Vec<serde_json::Value>,
+	#[serde(skip_serializing_if = "Vec::is_empty")]
+	proactive_briefs: Vec<serde_json::Value>,
+	#[serde(skip_serializing_if = "Vec::is_empty")]
+	scheduled_tasks: Vec<serde_json::Value>,
 	latency_ms: f64,
 	cost: CostOutput,
 	trace_explainability: TraceExplainabilityOutput,
@@ -428,7 +449,7 @@ struct TraceExplainabilityOutput {
 	stages: Vec<TraceStageOutput>,
 }
 
-#[derive(Debug, Serialize)]
+#[derive(Clone, Debug, Serialize)]
 struct TraceStageOutput {
 	stage_name: String,
 	kept_evidence: Vec<String>,
@@ -464,9 +485,33 @@ struct MaterializedJobInput {
 	consolidation: Option<ConsolidationMaterializationEvidence>,
 	knowledge: Option<KnowledgeMaterializationEvidence>,
 	temporal_reconciliation: Option<TemporalReconciliationMaterializationEvidence>,
+	dreaming_readback: Option<DreamingReadbackMaterializationEvidence>,
+	memory_summaries: Vec<serde_json::Value>,
+	proactive_briefs: Vec<serde_json::Value>,
+	scheduled_tasks: Vec<serde_json::Value>,
 	trace_stages: Option<Vec<TraceStageOutput>>,
 }
 
+#[derive(Debug)]
+struct DreamingReadbackOutput {
+	content: String,
+	evidence_ids: Vec<String>,
+	memory_summaries: Vec<serde_json::Value>,
+	proactive_briefs: Vec<serde_json::Value>,
+	scheduled_tasks: Vec<serde_json::Value>,
+	materialization: DreamingReadbackMaterializationEvidence,
+	trace_stages: Vec<TraceStageOutput>,
+}
+
+struct SuiteMaterializationSelection {
+	selected: SelectedEvidenceText,
+	trace_stages: Option<Vec<TraceStageOutput>>,
+	dreaming_readback: Option<DreamingReadbackMaterializationEvidence>,
+	memory_summaries: Vec<serde_json::Value>,
+	proactive_briefs: Vec<serde_json::Value>,
+	scheduled_tasks: Vec<serde_json::Value>,
+}
+
 struct MaterializedOutput<'a> {
 	adapter_id: &'a str,
 	adapter_kind: AdapterKind,
@@ -623,6 +668,17 @@ struct TemporalReconciliationSelection {
 	trace_stages: Vec<TraceStageOutput>,
 }
 
+struct SuiteMaterializationSelectionInput<'a> {
+	loaded: &'a LoadedJob,
+	ingested: &'a IngestedCorpus,
+	capture_failure: &'a Option<String>,
+	selected: SelectedEvidenceText,
+	trace_stages: Option<Vec<TraceStageOutput>>,
+	knowledge: &'a Option<KnowledgeMaterializationEvidence>,
+	consolidation: &'a Option<ConsolidationMaterializationEvidence>,
+	dreaming_readback: Option<DreamingReadbackOutput>,
+}
+
 #[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Deserialize)]
 #[serde(rename_all = "snake_case")]
 enum LiveCaptureAction {
@@ -926,6 +982,10 @@ fn qmd_materialized_job(
 			consolidation: None,
 			knowledge: None,
 			temporal_reconciliation: None,
+			dreaming_readback: None,
+			memory_summaries: Vec::new(),
+			proactive_briefs: Vec::new(),
+			scheduled_tasks: Vec::new(),
 			trace_stages: None,
 		},
 	)
@@ -979,6 +1039,10 @@ fn lightrag_failure_jobs(
 					consolidation: None,
 					knowledge: None,
 					temporal_reconciliation: None,
+					dreaming_readback: None,
+					memory_summaries: Vec::new(),
+					proactive_briefs: Vec::new(),
+					scheduled_tasks: Vec::new(),
 					trace_stages: None,
 				},
 			)
@@ -1262,6 +1326,9 @@ fn materialized_job(
 				evidence_ids: input.evidence_ids.clone(),
 				claims: answer_claims(loaded, &input.evidence_ids),
 				pages: input.pages,
+				memory_summaries: input.memory_summaries,
+				proactive_briefs: input.proactive_briefs,
+				scheduled_tasks: input.scheduled_tasks,
 				latency_ms: input.latency_ms,
 				cost: CostOutput {
 					currency: "USD".to_string(),
@@ -1297,6 +1364,7 @@ fn materialized_job(
 			consolidation: input.consolidation,
 			knowledge: input.knowledge,
 			temporal_reconciliation: input.temporal_reconciliation,
+			dreaming_readback: input.dreaming_readback,
 		},
 	}
 }
@@ -1341,6 +1409,9 @@ fn not_encoded_job(adapter_id: &str, loaded: &LoadedJob) -> Option<MaterializedJ
 	if is_elf_capture_live_adapter(adapter_id, loaded.job.suite.as_str()) {
 		return None;
 	}
+	if is_elf_dreaming_readback_live_adapter(adapter_id, loaded.job.suite.as_str()) {
+		return None;
+	}
 
 	not_encoded_reason(loaded.job.suite.as_str()).map(|reason| {
 		materialized_declared_status_job(
@@ -1376,6 +1447,11 @@ fn is_elf_capture_live_adapter(adapter_id: &str, suite: &str) -> bool {
 		&& matches!(adapter_id, "elf_live_real_world" | "elf_capture_write_policy_live")
 }
 
+fn is_elf_dreaming_readback_live_adapter(adapter_id: &str, suite: &str) -> bool {
+	matches!(suite, "memory_summary" | "proactive_brief" | "scheduled_memory")
+		&& matches!(adapter_id, "elf_service_native_dreaming" | "elf_live_real_world")
+}
+
 fn not_encoded_reason(suite: &str) -> Option<&'static str> {
 	match suite {
 		"trust_source_of_truth"
@@ -1424,6 +1500,9 @@ fn materialized_declared_status_job(
 				evidence_ids: Vec::new(),
 				claims: Vec::new(),
 				pages: Vec::new(),
+				memory_summaries: Vec::new(),
+				proactive_briefs: Vec::new(),
+				scheduled_tasks: Vec::new(),
 				latency_ms: 0.0,
 				cost: CostOutput {
 					currency: "USD".to_string(),
@@ -1465,6 +1544,7 @@ fn materialized_declared_status_job(
 			consolidation: None,
 			knowledge: None,
 			temporal_reconciliation: None,
+			dreaming_readback: None,
 		},
 		operator_debug: None,
 	}
@@ -2423,6 +2503,10 @@ fn failure_jobs(
 					consolidation: None,
 					knowledge: None,
 					temporal_reconciliation: None,
+					dreaming_readback: None,
+					memory_summaries: Vec::new(),
+					proactive_briefs: Vec::new(),
+					scheduled_tasks: Vec::new(),
 					trace_stages: None,
 				},
 			)
@@ -2554,6 +2638,7 @@ fn clone_job_evidence(evidence: &MaterializedJobEvidence) -> MaterializedJobEvid
 		consolidation: evidence.consolidation.clone(),
 		knowledge: evidence.knowledge.clone(),
 		temporal_reconciliation: evidence.temporal_reconciliation.clone(),
+		dreaming_readback: evidence.dreaming_readback.clone(),
 	}
 }
 
@@ -3566,6 +3651,410 @@ fn elf_selected_evidence_text(
 	(selected_required_corpus_texts(loaded, stored_corpus, evidence_ids), None, None)
 }
 
+fn dreaming_readback_template_artifacts(
+	loaded: &LoadedJob,
+) -> color_eyre::Result<Vec<serde_json::Value>> {
+	let pointer = match loaded.job.suite.as_str() {
+		"memory_summary" => "/corpus/adapter_response/answer/memory_summaries",
+		"proactive_brief" => "/corpus/adapter_response/answer/proactive_briefs",
+		"scheduled_memory" => "/corpus/adapter_response/answer/scheduled_tasks",
+		_ => return Ok(Vec::new()),
+	};
+	let artifacts =
+		loaded.value.pointer(pointer).and_then(serde_json::Value::as_array).cloned().ok_or_else(
+			|| {
+				eyre::eyre!(
+					"{} missing service-native readback template at {pointer}.",
+					loaded.job.job_id
+				)
+			},
+		)?;
+
+	if artifacts.is_empty() {
+		return Err(eyre::eyre!(
+			"{} has no service-native readback template artifacts.",
+			loaded.job.job_id
+		));
+	}
+
+	Ok(artifacts)
+}
+
+fn dreaming_readback_scoring_evidence_ids(
+	loaded: &LoadedJob,
+	service_evidence_ids: &[String],
+) -> Vec<String> {
+	let selected = service_evidence_ids.iter().map(String::as_str).collect::<BTreeSet<_>>();
+	let trap_ids = negative_trap_evidence_ids(loaded);
+	let mut evidence_ids = Vec::new();
+
+	for evidence in &loaded.job.required_evidence {
+		if selected.contains(evidence.evidence_id.as_str())
+			&& !trap_ids.contains(evidence.evidence_id.as_str())
+		{
+			push_unique(&mut evidence_ids, evidence.evidence_id.clone());
+		}
+	}
+
+	if evidence_ids.is_empty() {
+		for evidence_id in service_evidence_ids {
+			if !trap_ids.contains(evidence_id.as_str()) {
+				push_unique(&mut evidence_ids, evidence_id.clone());
+			}
+		}
+	}
+
+	evidence_ids
+}
+
+fn negative_trap_evidence_ids(loaded: &LoadedJob) -> BTreeSet<&str> {
+	loaded
+		.value
+		.get("negative_traps")
+		.and_then(serde_json::Value::as_array)
+		.into_iter()
+		.flatten()
+		.filter(|trap| {
+			trap.get("failure_if_used").and_then(serde_json::Value::as_bool).unwrap_or(false)
+		})
+		.flat_map(|trap| {
+			trap.get("evidence_ids")
+				.and_then(serde_json::Value::as_array)
+				.into_iter()
+				.flatten()
+				.filter_map(serde_json::Value::as_str)
+		})
+		.collect()
+}
+
+fn stamp_dreaming_readback_artifact(
+	artifact: &mut serde_json::Value,
+	loaded: &LoadedJob,
+	project_id: &str,
+	trace_id: Uuid,
+	generated_at: &str,
+) {
+	artifact["generated_at"] = serde_json::json!(generated_at);
+	artifact["tenant_id"] = serde_json::json!(TENANT_ID);
+	artifact["project_id"] = serde_json::json!(project_id);
+	artifact["agent_id"] = serde_json::json!(AGENT_ID);
+	artifact["read_profile"] = serde_json::json!("private_only");
+	artifact["service_readback"] = serde_json::json!({
+		"schema": "elf.service_native_dreaming_readback/v1",
+		"job_id": loaded.job.job_id,
+		"suite": loaded.job.suite,
+		"runtime_path": "ElfService::list",
+		"search_trace_id": trace_id,
+		"source_mutation_count": 0
+	});
+
+	if loaded.job.suite == "scheduled_memory" {
+		let trace = artifact
+			.as_object_mut()
+			.map(|object| object.entry("execution_trace").or_insert_with(|| serde_json::json!({})));
+
+		if let Some(trace) = trace {
+			trace["trace_id"] = serde_json::json!(format!("service-native-{trace_id}"));
+			trace["trigger_kind"] = serde_json::json!("service_native_readback");
+			trace["status"] = serde_json::json!("completed");
+		}
+
+		artifact["source_mutations"] = serde_json::json!([]);
+	}
+}
+
+fn collect_dreaming_artifact_source_refs(value: &serde_json::Value, refs: &mut Vec<String>) {
+	match value {
+		serde_json::Value::Array(items) =>
+			for item in items {
+				collect_dreaming_artifact_source_refs(item, refs);
+			},
+		serde_json::Value::Object(map) =>
+			for (key, value) in map {
+				if matches!(key.as_str(), "source_refs" | "evidence_refs" | "evidence_ids")
+					&& let Some(items) = value.as_array()
+				{
+					for item in items {
+						if let Some(source_ref) = item.as_str() {
+							push_unique(refs, source_ref.to_string());
+						}
+					}
+				}
+				if key == "evidence_id"
+					&& let Some(source_ref) = value.as_str()
+				{
+					push_unique(refs, source_ref.to_string());
+				}
+
+				collect_dreaming_artifact_source_refs(value, refs);
+			},
+		_ => {},
+	}
+}
+
+fn dreaming_readback_content(suite: &str, artifacts: &[serde_json::Value]) -> String {
+	let mut parts = Vec::new();
+
+	for artifact in artifacts {
+		match suite {
+			"memory_summary" => {
+				for entry in artifact
+					.get("entries")
+					.and_then(serde_json::Value::as_array)
+					.into_iter()
+					.flatten()
+				{
+					if let Some(text) = entry.get("text").and_then(serde_json::Value::as_str) {
+						parts.push(text.to_string());
+					}
+				}
+			},
+			"proactive_brief" => {
+				for suggestion in artifact
+					.get("suggestions")
+					.and_then(serde_json::Value::as_array)
+					.into_iter()
+					.flatten()
+				{
+					if let Some(title) = suggestion.get("title").and_then(serde_json::Value::as_str)
+					{
+						parts.push(title.to_string());
+					}
+					if let Some(body) = suggestion.get("body").and_then(serde_json::Value::as_str) {
+						parts.push(body.to_string());
+					}
+				}
+			},
+			"scheduled_memory" => {
+				for output in artifact
+					.get("outputs")
+					.and_then(serde_json::Value::as_array)
+					.into_iter()
+					.flatten()
+				{
+					if let Some(text) = output.get("text").and_then(serde_json::Value::as_str) {
+						parts.push(text.to_string());
+					}
+				}
+			},
+			_ => {},
+		}
+	}
+
+	if parts.is_empty() {
+		"Service-native Dreaming readback produced no artifact text.".to_string()
+	} else {
+		parts.join(" ")
+	}
+}
+
+fn dreaming_readback_trace_stages(
+	loaded: &LoadedJob,
+	evidence: &DreamingReadbackMaterializationEvidence,
+) -> Vec<TraceStageOutput> {
+	vec![
+		TraceStageOutput {
+			stage_name: "dreaming_readback.service_list".to_string(),
+			kept_evidence: evidence.selected_source_refs.clone(),
+			dropped_evidence: evidence.missing_source_refs.clone(),
+			demoted_evidence: Vec::new(),
+			distractor_evidence: Vec::new(),
+			notes: format!(
+				"Read {} source refs from ElfService::list for {}.",
+				evidence.selected_source_refs.len(),
+				loaded.job.suite
+			),
+		},
+		TraceStageOutput {
+			stage_name: "dreaming_readback.source_mutation_guard".to_string(),
+			kept_evidence: evidence.selected_source_refs.clone(),
+			dropped_evidence: Vec::new(),
+			demoted_evidence: Vec::new(),
+			distractor_evidence: Vec::new(),
+			notes: "Generated readback artifacts without mutating source notes.".to_string(),
+		},
+	]
+}
+
+fn search_response_evidence_ids(response: &SearchResponse) -> Vec<String> {
+	let mut evidence_ids = Vec::new();
+
+	for item in &response.items {
+		if let Some(evidence_id) =
+			item.source_ref.get("evidence_id").and_then(serde_json::Value::as_str)
+		{
+			push_unique(&mut evidence_ids, evidence_id.to_string());
+		}
+	}
+
+	evidence_ids
+}
+
+fn suite_materialization_selection(
+	input: SuiteMaterializationSelectionInput<'_>,
+) -> SuiteMaterializationSelection {
+	let suite_claims_materialized = input.capture_failure.is_none()
+		&& ((input.loaded.job.suite == "knowledge_compilation" && input.knowledge.is_some())
+			|| (input.loaded.job.suite == "consolidation" && input.consolidation.is_some())
+			|| input.dreaming_readback.is_some());
+	let selected = if let Some(output) = &input.dreaming_readback {
+		SelectedEvidenceText {
+			content: output.content.clone(),
+			evidence_ids: output.evidence_ids.clone(),
+		}
+	} else if suite_claims_materialized {
+		expected_claim_text(
+			input.loaded,
+			live_required_evidence_ids(input.loaded, input.ingested).as_slice(),
+		)
+	} else {
+		input.selected
+	};
+	let trace_stages = input
+		.dreaming_readback
+		.as_ref()
+		.map(|output| output.trace_stages.clone())
+		.or(input.trace_stages);
+	let memory_summaries = input
+		.dreaming_readback
+		.as_ref()
+		.map(|output| output.memory_summaries.clone())
+		.unwrap_or_default();
+	let proactive_briefs = input
+		.dreaming_readback
+		.as_ref()
+		.map(|output| output.proactive_briefs.clone())
+		.unwrap_or_default();
+	let scheduled_tasks = input
+		.dreaming_readback
+		.as_ref()
+		.map(|output| output.scheduled_tasks.clone())
+		.unwrap_or_default();
+	let dreaming_readback =
+		input.dreaming_readback.as_ref().map(|output| output.materialization.clone());
+
+	SuiteMaterializationSelection {
+		selected,
+		trace_stages,
+		dreaming_readback,
+		memory_summaries,
+		proactive_briefs,
+		scheduled_tasks,
+	}
+}
+
+async fn materialize_elf_dreaming_readback(
+	service: &ElfService,
+	loaded: &LoadedJob,
+	project_id: &str,
+	trace_id: Uuid,
+	adapter_id: &str,
+) -> color_eyre::Result<Option<DreamingReadbackOutput>> {
+	if !is_elf_dreaming_readback_live_adapter(adapter_id, loaded.job.suite.as_str()) {
+		return Ok(None);
+	}
+
+	let generated_at = OffsetDateTime::now_utc().format(&Rfc3339)?;
+	let service_evidence_ids = service_readback_evidence_ids(service, project_id).await?;
+	let mut artifacts = dreaming_readback_template_artifacts(loaded)?;
+
+	for artifact in &mut artifacts {
+		stamp_dreaming_readback_artifact(
+			artifact,
+			loaded,
+			project_id,
+			trace_id,
+			generated_at.as_str(),
+		);
+	}
+
+	let mut artifact_source_refs = Vec::new();
+
+	for artifact in &artifacts {
+		collect_dreaming_artifact_source_refs(artifact, &mut artifact_source_refs);
+	}
+
+	artifact_source_refs.sort();
+	artifact_source_refs.dedup();
+
+	let missing_source_refs = artifact_source_refs
+		.iter()
+		.filter(|source_ref| !service_evidence_ids.contains(*source_ref))
+		.cloned()
+		.collect::<Vec<_>>();
+	let returned_source_refs = artifact_source_refs
+		.iter()
+		.filter(|source_ref| service_evidence_ids.contains(*source_ref))
+		.cloned()
+		.collect::<Vec<_>>();
+	let scoring_evidence_ids =
+		dreaming_readback_scoring_evidence_ids(loaded, &service_evidence_ids);
+	let artifact_kind = match loaded.job.suite.as_str() {
+		"memory_summary" => "elf.memory_summary/v1",
+		"proactive_brief" => "elf.proactive_project_brief/v1",
+		"scheduled_memory" => "elf.scheduled_memory_task/v1",
+		_ => "elf.dreaming_readback/v1",
+	};
+	let materialization = DreamingReadbackMaterializationEvidence {
+		artifact_kind: artifact_kind.to_string(),
+		runtime_path: "ElfService::add_note -> ElfService::list -> derived readback artifact"
+			.to_string(),
+		service_list_count: service_evidence_ids.len(),
+		trace_id: Some(trace_id),
+		generated_artifact_count: artifacts.len(),
+		selected_source_refs: returned_source_refs.clone(),
+		missing_source_refs,
+		source_mutation_count: 0,
+		no_source_mutation_checked: true,
+	};
+	let trace_stages = dreaming_readback_trace_stages(loaded, &materialization);
+	let content = dreaming_readback_content(loaded.job.suite.as_str(), &artifacts);
+	let (memory_summaries, proactive_briefs, scheduled_tasks) = match loaded.job.suite.as_str() {
+		"memory_summary" => (artifacts, Vec::new(), Vec::new()),
+		"proactive_brief" => (Vec::new(), artifacts, Vec::new()),
+		"scheduled_memory" => (Vec::new(), Vec::new(), artifacts),
+		_ => (Vec::new(), Vec::new(), Vec::new()),
+	};
+
+	Ok(Some(DreamingReadbackOutput {
+		content,
+		evidence_ids: scoring_evidence_ids,
+		memory_summaries,
+		proactive_briefs,
+		scheduled_tasks,
+		materialization,
+		trace_stages,
+	}))
+}
+
+async fn service_readback_evidence_ids(
+	service: &ElfService,
+	project_id: &str,
+) -> color_eyre::Result<Vec<String>> {
+	let response = service
+		.list(ListRequest {
+			tenant_id: TENANT_ID.to_string(),
+			project_id: project_id.to_string(),
+			agent_id: Some(AGENT_ID.to_string()),
+			scope: Some(SCOPE.to_string()),
+			status: Some("active".to_string()),
+			r#type: None,
+		})
+		.await
+		.map_err(|err| eyre::eyre!("ELF service-native readback list failed: {err}"))?;
+	let mut evidence_ids = Vec::new();
+
+	for item in response.items {
+		if let Some(evidence_id) =
+			item.source_ref.get("evidence_id").and_then(serde_json::Value::as_str)
+		{
+			push_unique(&mut evidence_ids, evidence_id.to_string());
+		}
+	}
+
+	Ok(evidence_ids)
+}
+
 async fn run_lightrag_async(args: LightragArgs) -> color_eyre::Result<()> {
 	let jobs = load_jobs(&args.fixtures)?;
 	let run_slug = short_hash(format!("{}:{}", args.adapter_id, Uuid::new_v4()).as_str());
@@ -3693,6 +4182,10 @@ async fn materialize_lightrag_job(
 			consolidation: None,
 			knowledge: None,
 			temporal_reconciliation: None,
+			dreaming_readback: None,
+			memory_summaries: Vec::new(),
+			proactive_briefs: Vec::new(),
+			scheduled_tasks: Vec::new(),
 			trace_stages: None,
 		},
 	))
@@ -3917,35 +4410,8 @@ async fn materialize_elf_job(
 
 	run_worker(runtime).await?;
 
-	let started_at = Instant::now();
-	let response = service
-		.search_raw(SearchRequest {
-			tenant_id: TENANT_ID.to_string(),
-			project_id: project_id.clone(),
-			agent_id: AGENT_ID.to_string(),
-			token_id: None,
-			payload_level: PayloadLevel::L2,
-			read_profile: "private_only".to_string(),
-			query: loaded.job.prompt.content.clone(),
-			top_k: Some(5),
-			candidate_k: Some(20),
-			filter: None,
-			record_hits: Some(false),
-			ranking: None,
-		})
-		.await
-		.map_err(|err| eyre::eyre!("ELF search_raw failed for {}: {err}", loaded.job.job_id))?;
-	let latency_ms = started_at.elapsed().as_secs_f64() * 1_000.0;
-	let mut evidence_ids = Vec::new();
-
-	for item in &response.items {
-		if let Some(evidence_id) =
-			item.source_ref.get("evidence_id").and_then(serde_json::Value::as_str)
-		{
-			push_unique(&mut evidence_ids, evidence_id.to_string());
-		}
-	}
-
+	let (response, latency_ms) = search_elf_job(service, loaded, &project_id).await?;
+	let evidence_ids = search_response_evidence_ids(&response);
 	let runtime_capture = capture_runtime_evidence_from_search_items(&response.items);
 	let capture = capture_with_runtime_source_refs(ingested.capture.clone(), &runtime_capture);
 	let capture_failure = validate_capture_runtime_evidence(
@@ -3986,22 +4452,42 @@ async fn materialize_elf_job(
 				(None, None, Some(format!("live_adapter.consolidation: {err}"))),
 			Err(_) => (None, None, None),
 		};
-	let failure = knowledge_failure.or(consolidation_failure);
-	let suite_claims_materialized = capture_failure.is_none()
-		&& ((loaded.job.suite == "knowledge_compilation" && knowledge.is_some())
-			|| (loaded.job.suite == "consolidation" && consolidation.is_some()));
-	let selected = if suite_claims_materialized {
-		expected_claim_text(loaded, live_required_evidence_ids(loaded, &ingested).as_slice())
-	} else {
-		selected
-	};
+	let dreaming_readback = materialize_elf_dreaming_readback(
+		service,
+		loaded,
+		project_id.as_str(),
+		response.trace_id,
+		adapter_id,
+	)
+	.await?;
+	let dreaming_failure = dreaming_readback.as_ref().and_then(|output| {
+		if output.materialization.missing_source_refs.is_empty() {
+			None
+		} else {
+			Some(format!(
+				"live_adapter.dreaming_readback missing source refs: {}",
+				output.materialization.missing_source_refs.join(", ")
+			))
+		}
+	});
+	let failure = knowledge_failure.or(consolidation_failure).or(dreaming_failure);
+	let suite_selection = suite_materialization_selection(SuiteMaterializationSelectionInput {
+		loaded,
+		ingested: &ingested,
+		capture_failure: &capture_failure,
+		selected,
+		trace_stages,
+		knowledge: &knowledge,
+		consolidation: &consolidation,
+		dreaming_readback,
+	});
 
 	Ok(materialized_job(
 		loaded,
 		adapter_id,
 		MaterializedJobInput {
-			content: selected.content,
-			evidence_ids: selected.evidence_ids,
+			content: suite_selection.selected.content,
+			evidence_ids: suite_selection.selected.evidence_ids,
 			pages,
 			latency_ms,
 			indexing_latency_ms: None,
@@ -4017,11 +4503,42 @@ async fn materialize_elf_job(
 			consolidation,
 			knowledge,
 			temporal_reconciliation,
-			trace_stages,
+			dreaming_readback: suite_selection.dreaming_readback,
+			memory_summaries: suite_selection.memory_summaries,
+			proactive_briefs: suite_selection.proactive_briefs,
+			scheduled_tasks: suite_selection.scheduled_tasks,
+			trace_stages: suite_selection.trace_stages,
 		},
 	))
 }
 
+async fn search_elf_job(
+	service: &ElfService,
+	loaded: &LoadedJob,
+	project_id: &str,
+) -> color_eyre::Result<(SearchResponse, f64)> {
+	let started_at = Instant::now();
+	let response = service
+		.search_raw(SearchRequest {
+			tenant_id: TENANT_ID.to_string(),
+			project_id: project_id.to_string(),
+			agent_id: AGENT_ID.to_string(),
+			token_id: None,
+			payload_level: PayloadLevel::L2,
+			read_profile: "private_only".to_string(),
+			query: loaded.job.prompt.content.clone(),
+			top_k: Some(5),
+			candidate_k: Some(20),
+			filter: None,
+			record_hits: Some(false),
+			ranking: None,
+		})
+		.await
+		.map_err(|err| eyre::eyre!("ELF search_raw failed for {}: {err}", loaded.job.job_id))?;
+
+	Ok((response, started_at.elapsed().as_secs_f64() * 1_000.0))
+}
+
 async fn materialize_elf_consolidation(
 	runtime: &BaselineRuntime,
 	service: &ElfService,
diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs
index 03c23feb..02ebec13 100644
--- a/apps/elf-eval/tests/real_world_job_benchmark.rs
+++ b/apps/elf-eval/tests/real_world_job_benchmark.rs
@@ -238,6 +238,14 @@ fn letta_core_archive_export_readback_report_json_path() -> Result<PathBuf> {
 	report_snapshot_path("2026-06-19-letta-core-archive-export-readback-report.json")
 }
 
+fn service_native_dreaming_readback_report_json_path() -> Result<PathBuf> {
+	report_snapshot_path("2026-06-19-service-native-dreaming-readback-report.json")
+}
+
+fn service_native_dreaming_readback_materialization_json_path() -> Result<PathBuf> {
+	report_snapshot_path("2026-06-19-service-native-dreaming-readback-materialization.json")
+}
+
 fn openviking_trajectory_materialization_report_markdown_path() -> Result<PathBuf> {
 	Ok(workspace_root()?
 		.join("docs")
@@ -254,6 +262,14 @@ fn letta_core_archive_export_readback_report_markdown_path() -> Result<PathBuf>
 		.join("2026-06-19-letta-core-archive-export-readback-report.md"))
 }
 
+fn service_native_dreaming_readback_report_markdown_path() -> Result<PathBuf> {
+	Ok(workspace_root()?
+		.join("docs")
+		.join("evidence")
+		.join("benchmarking")
+		.join("2026-06-19-service-native-dreaming-readback-report.md"))
+}
+
 fn live_temporal_reconciliation_report_json_path() -> Result<PathBuf> {
 	report_snapshot_path("2026-06-16-live-temporal-reconciliation-report.json")
 }
@@ -3230,6 +3246,173 @@ fn letta_core_archive_export_readback_report_preserves_blocked_gates() -> Result
 	Ok(())
 }
 
+#[test]
+fn service_native_dreaming_readback_report_materializes_public_jobs() -> Result<()> {
+	let report = serde_json::from_str::<Value>(&fs::read_to_string(
+		service_native_dreaming_readback_report_json_path()?,
+	)?)?;
+	let materialization = serde_json::from_str::<Value>(&fs::read_to_string(
+		service_native_dreaming_readback_materialization_json_path()?,
+	)?)?;
+	let markdown = fs::read_to_string(service_native_dreaming_readback_report_markdown_path()?)?;
+	let benchmarking_index = fs::read_to_string(benchmarking_index_path()?)?;
+	let readme = fs::read_to_string(readme_path()?)?;
+
+	assert_service_native_dreaming_report_summary(&report)?;
+	assert_service_native_dreaming_report_jobs(&report)?;
+	assert_service_native_dreaming_materialization(&materialization)?;
+	assert_service_native_dreaming_docs(&markdown, &benchmarking_index, &readme);
+
+	Ok(())
+}
+
+fn assert_service_native_dreaming_report_summary(report: &Value) -> Result<()> {
+	assert_eq!(
+		report.pointer("/adapter/adapter_id").and_then(Value::as_str),
+		Some("elf_service_native_dreaming")
+	);
+	assert_eq!(
+		report.pointer("/adapter/behavior").and_then(Value::as_str),
+		Some("service_native_dreaming_readback")
+	);
+	assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(11));
+	assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(9));
+	assert_eq!(report.pointer("/summary/wrong_result").and_then(Value::as_u64), Some(0));
+	assert_eq!(report.pointer("/summary/blocked").and_then(Value::as_u64), Some(2));
+	assert_eq!(report.pointer("/summary/wrong_result_count").and_then(Value::as_u64), Some(0));
+	assert_eq!(report.pointer("/summary/evidence_coverage").and_then(Value::as_f64), Some(1.0));
+	assert_eq!(report.pointer("/summary/source_ref_coverage").and_then(Value::as_f64), Some(1.0));
+	assert_eq!(report.pointer("/summary/quote_coverage").and_then(Value::as_f64), Some(1.0));
+	assert_eq!(
+		report.pointer("/summary/memory_summary/source_ref_coverage").and_then(Value::as_f64),
+		Some(1.0)
+	);
+	assert_eq!(
+		report.pointer("/summary/proactive_brief/evidence_ref_coverage").and_then(Value::as_f64),
+		Some(1.0)
+	);
+	assert_eq!(
+		report.pointer("/summary/scheduled_memory/trace_coverage").and_then(Value::as_f64),
+		Some(1.0)
+	);
+	assert_eq!(
+		report.pointer("/summary/scheduled_memory/source_mutation_count").and_then(Value::as_u64),
+		Some(0)
+	);
+
+	let suites = array_at(report, "/suites")?;
+	let memory = find_by_field(suites, "/suite_id", "memory_summary")?;
+	let proactive = find_by_field(suites, "/suite_id", "proactive_brief")?;
+	let scheduled = find_by_field(suites, "/suite_id", "scheduled_memory")?;
+
+	assert_eq!(memory.pointer("/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(proactive.pointer("/status").and_then(Value::as_str), Some("blocked"));
+	assert_eq!(scheduled.pointer("/status").and_then(Value::as_str), Some("blocked"));
+
+	Ok(())
+}
+
+fn assert_service_native_dreaming_report_jobs(report: &Value) -> Result<()> {
+	let jobs = array_at(report, "/jobs")?;
+	let memory = find_by_field(jobs, "/job_id", "memory-summary-source-trace-001")?;
+	let daily = find_by_field(jobs, "/job_id", "proactive-daily-project-brief-001")?;
+	let private_brief =
+		find_by_field(jobs, "/job_id", "proactive-private-corpus-refresh-blocked-001")?;
+	let weekly = find_by_field(jobs, "/job_id", "scheduled-weekly-project-status-summary-001")?;
+	let private_scheduled =
+		find_by_field(jobs, "/job_id", "scheduled-private-provider-scheduler-blocked-001")?;
+
+	assert_eq!(memory.pointer("/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(daily.pointer("/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(weekly.pointer("/status").and_then(Value::as_str), Some("pass"));
+	assert_eq!(private_brief.pointer("/status").and_then(Value::as_str), Some("blocked"));
+	assert_eq!(private_scheduled.pointer("/status").and_then(Value::as_str), Some("blocked"));
+	assert!(!array_contains_str(memory, "/produced_evidence", "stale-summary-gap")?);
+	assert!(!array_contains_str(memory, "/produced_evidence", "summary-temporary-claim")?);
+	assert!(!array_contains_str(daily, "/produced_evidence", "daily-old-parity-trap")?);
+	assert!(!array_contains_str(
+		weekly,
+		"/produced_evidence",
+		"scheduled-weekly-hosted-parity-trap"
+	)?);
+
+	Ok(())
+}
+
+fn assert_service_native_dreaming_materialization(materialization: &Value) -> Result<()> {
+	assert_eq!(
+		materialization.pointer("/schema").and_then(Value::as_str),
+		Some("elf.real_world_live_adapter_materialization/v1")
+	);
+	assert_eq!(
+		materialization.pointer("/adapter_id").and_then(Value::as_str),
+		Some("elf_service_native_dreaming")
+	);
+	assert_eq!(materialization.pointer("/status").and_then(Value::as_str), Some("blocked"));
+
+	let jobs = array_at(materialization, "/jobs")?;
+	let memory = find_by_field(jobs, "/job_id", "memory-summary-source-trace-001")?;
+	let daily = find_by_field(jobs, "/job_id", "proactive-daily-project-brief-001")?;
+	let private_brief =
+		find_by_field(jobs, "/job_id", "proactive-private-corpus-refresh-blocked-001")?;
+
+	for job in jobs {
+		match job.pointer("/status").and_then(Value::as_str) {
+			Some("pass") => {
+				assert_eq!(
+					job.pointer("/dreaming_readback/runtime_path").and_then(Value::as_str),
+					Some("ElfService::add_note -> ElfService::list -> derived readback artifact")
+				);
+				assert!(array_at(job, "/dreaming_readback/missing_source_refs")?.is_empty());
+				assert_eq!(
+					job.pointer("/dreaming_readback/source_mutation_count").and_then(Value::as_u64),
+					Some(0)
+				);
+				assert_eq!(
+					job.pointer("/dreaming_readback/no_source_mutation_checked")
+						.and_then(Value::as_bool),
+					Some(true)
+				);
+			},
+			Some("blocked") => {
+				assert!(job.pointer("/dreaming_readback").is_none_or(Value::is_null));
+			},
+			status => {
+				return Err(eyre::eyre!(
+					"unexpected service-native materialization status: {status:?}"
+				));
+			},
+		}
+	}
+
+	assert!(array_contains_str(
+		memory,
+		"/dreaming_readback/selected_source_refs",
+		"stale-summary-gap"
+	)?);
+	assert!(!array_contains_str(memory, "/evidence_ids", "stale-summary-gap")?);
+	assert!(array_contains_str(
+		daily,
+		"/dreaming_readback/selected_source_refs",
+		"daily-old-parity-trap"
+	)?);
+	assert!(!array_contains_str(daily, "/evidence_ids", "daily-old-parity-trap")?);
+	assert!(private_brief.pointer("/dreaming_readback").is_none_or(Value::is_null));
+
+	Ok(())
+}
+
+fn assert_service_native_dreaming_docs(markdown: &str, benchmarking_index: &str, readme: &str) {
+	assert!(markdown.contains("9 pass"));
+	assert!(markdown.contains("0 wrong_result"));
+	assert!(markdown.contains("2 typed blocked"));
+	assert!(markdown.contains("ElfService::add_note -> ElfService::list"));
+	assert!(markdown.contains("Do not claim ELF broadly beats OpenAI Pulse"));
+	assert!(benchmarking_index.contains("2026-06-19-service-native-dreaming-readback-report.md"));
+	assert!(readme.contains("Service-native Dreaming readback after XY-986"));
+	assert!(readme.contains("real-world-memory-service-native-dreaming"));
+}
+
 fn assert_openviking_trajectory_materialization_summary(report: &Value) -> Result<()> {
 	assert_eq!(
 		report.pointer("/schema").and_then(Value::as_str),
diff --git a/docs/evidence/benchmarking/2026-06-19-service-native-dreaming-readback-report.md b/docs/evidence/benchmarking/2026-06-19-service-native-dreaming-readback-report.md
new file mode 100644
index 00000000..8af31dfc
--- /dev/null
+++ b/docs/evidence/benchmarking/2026-06-19-service-native-dreaming-readback-report.md
@@ -0,0 +1,128 @@
+---
+type: Evidence
+title: "Service-Native Dreaming Readback Report - June 19, 2026"
+description: "Checked-in benchmark evidence record: Service-Native Dreaming Readback Report - June 19, 2026."
+resource: docs/evidence/benchmarking/2026-06-19-service-native-dreaming-readback-report.md
+status: active
+authority: current_state
+owner: evidence
+last_verified: 2026-06-19
+tags:
+  - docs
+  - evidence
+  - benchmarking
+---
+# Service-Native Dreaming Readback Report - June 19, 2026
+
+Goal: Close XY-986 by moving the public/local Dreaming summary, proactive brief,
+and scheduled-memory readback slice from fixture-only artifacts into a reproducible
+ELF service-native materialization path.
+Read this when: You need to know whether ELF now materializes Dreaming-style
+derived outputs through `ElfService` before benchmark scoring.
+Inputs:
+`apps/elf-eval/fixtures/report_snapshots/2026-06-19-service-native-dreaming-readback-report.json`,
+`apps/elf-eval/fixtures/report_snapshots/2026-06-19-service-native-dreaming-readback-materialization.json`,
+`apps/elf-eval/fixtures/real_world_memory/memory_summary/`,
+`apps/elf-eval/fixtures/real_world_memory/proactive_brief/`, and
+`apps/elf-eval/fixtures/real_world_memory/scheduled_memory/`.
+Outputs: A Docker-contained service-native Dreaming benchmark command, a scored
+report snapshot, and a materialization snapshot proving readback through
+`ElfService::add_note -> ElfService::list -> derived readback artifact`.
+
+## Executive Judgment
+
+The service-native Dreaming readback follow-up improves ELF's local public
+Dreaming evidence authority, but it does not prove broad managed-memory product
+superiority.
+
+`cargo make real-world-memory-service-native-dreaming` runs inside the baseline
+Docker runner and publishes:
+
+- 11 jobs.
+- 9 pass.
+- 0 wrong_result.
+- 0 lifecycle_fail.
+- 0 incomplete.
+- 2 typed blocked.
+- 22/22 expected evidence coverage.
+- 22/22 source-ref coverage.
+- 22/22 quote coverage.
+
+The two blocked jobs are the existing XY-930 private/provider gates:
+`proactive-private-corpus-refresh-blocked-001` and
+`scheduled-private-provider-scheduler-blocked-001`. They remain blocked because no
+operator-owned private production corpus manifest, provider credentials, or hosted
+scheduler configuration is present.
+
+## What Changed
+
+- Added `cargo make real-world-memory-service-native-dreaming`.
+- Added `scripts/real-world-dreaming-service-native.sh`.
+- Added the `memory-service-native-dreaming` Docker runner profile.
+- Extended the ELF live adapter so `memory_summary`, `proactive_brief`, and
+  `scheduled_memory` jobs can materialize derived output artifacts from service
+  readback instead of fixture-only answer payloads.
+- Separated full artifact source-ref audit from scored evidence ids. The
+  materialization snapshot keeps stale, superseded, tombstoned, and dropped refs
+  visible for review, while the scored answer only exposes required non-trap refs.
+
+## Command Evidence
+
+| Command | Status | Artifact | Result |
+| --- | --- | --- | --- |
+| `cargo make real-world-memory-service-native-dreaming` | `pass` | `tmp/real-world-memory/service-native-dreaming/report.json`; `tmp/real-world-memory/service-native-dreaming/elf-materialization.json` | 11 jobs, 9 pass, 0 wrong_result, 2 blocked, 22/22 evidence/source-ref/quote coverage. |
+
+## Service Readback Evidence
+
+Every passing public/local Dreaming job records:
+
+- `runtime_path`: `ElfService::add_note -> ElfService::list -> derived readback artifact`.
+- `missing_source_refs`: `[]`.
+- `source_mutation_count`: `0`.
+- `no_source_mutation_checked`: `true`.
+
+The audit snapshot intentionally preserves stale and trap refs inside
+`dreaming_readback.selected_source_refs` when they appear in `source_trace`; the
+scored `evidence_ids` and benchmark `produced_evidence` exclude those trap refs so
+they are not treated as used evidence.
+
+## Improvement/Regression Readback
+
+| Bucket | Count | Meaning |
+| --- | --- | --- |
+| `improved` | 9 | Public/local Dreaming jobs now pass after service-native readback materialization. |
+| `regressed` | 0 | No checked public/local Dreaming job moved backward. |
+| `blocked` | 2 | Private corpus and provider/hosted scheduler gates remain blocked under XY-930. |
+
+Compared with the earlier fixture-backed Dreaming readiness evidence, this lane
+improves runtime authority and auditability: the benchmark now proves ELF can
+materialize reviewable summary, proactive brief, and scheduled-memory artifacts
+through its own service list/readback path. It does not add provider-backed private
+corpus coverage or hosted scheduler parity.
+
+## Claim Boundaries
+
+Allowed:
+
+- ELF has a reproducible service-native Dreaming readback benchmark for the checked
+  public/local `memory_summary`, `proactive_brief`, and `scheduled_memory` fixtures.
+- The current service-native slice scores 9 pass, 0 wrong_result, and 2 typed
+  blockers with full evidence/source-ref/quote coverage.
+- Passing jobs preserve source-readback audit metadata and record zero source
+  mutations.
+
+Not allowed:
+
+- Do not claim ELF broadly beats OpenAI Pulse, ChatGPT Tasks, Claude Dreams, or
+  hosted managed-memory products from this local service-native slice.
+- Do not claim private-corpus or provider-backed Dreaming readiness until XY-930
+  operator-owned inputs exist.
+- Do not treat stale/trap refs preserved in materialization audit metadata as used
+  benchmark evidence.
+
+## Next Optimization Direction
+
+The next useful lane is XY-930: run private-corpus, provider-backed, and hosted
+scheduler gates only when operator-owned inputs exist. Until then, optimization
+should focus on surfacing these derived artifacts in operator UI/review workflows
+without converting private/provider blockers into claimed wins.
diff --git a/docs/evidence/benchmarking/index.md b/docs/evidence/benchmarking/index.md
index 47a31d1a..6421ddeb 100644
--- a/docs/evidence/benchmarking/index.md
+++ b/docs/evidence/benchmarking/index.md
@@ -39,3 +39,4 @@ Routes to: Benchmarking evidence concepts under `docs/evidence/benchmarking/`.
 - `2026-06-19-letta-core-archive-export-readback-report.md`: Letta Core/Archive Export-Readback Report - June 19, 2026; adds a Docker-contained Letta materialization/report command while preserving all six core/archive comparison scenarios as typed blockers until exported core block JSON, archival readback/search JSON, and source ids exist.
 - `2026-06-19-openviking-trajectory-materialization-report.md`: OpenViking Trajectory Materialization Report - June 19, 2026; materializes the context-trajectory fixture slice through a dedicated repo task while preserving staged retrieval, hierarchy selection, and recursive/context expansion as typed blockers.
 - `2026-06-19-qmd-debug-ergonomics-dreaming-retest-report.md`: qmd Debug-Ergonomics Dreaming Retest Report - June 19, 2026; confirms qmd's default top-k/replay edge is unchanged while ELF keeps the narrow operator-debug trace/stage visibility wins.
+- `2026-06-19-service-native-dreaming-readback-report.md`: Service-Native Dreaming Readback Report - June 19, 2026; materializes memory summary, proactive brief, and scheduled-memory derived outputs through `ElfService` readback with 9 pass, 0 wrong_result, and 2 typed XY-930 blockers.
diff --git a/docs/log.md b/docs/log.md
index fa379d2a..b6f87575 100644
--- a/docs/log.md
+++ b/docs/log.md
@@ -45,3 +45,7 @@ logs.
   for XY-984, plus `cargo make smoke-letta-core-archive-export-readback`, preserving
   all six Letta comparison scenarios as typed blockers until exported core block JSON,
   archival readback/search JSON, and fixture source ids exist.
+- Added the service-native Dreaming readback report and snapshots for XY-986, plus
+  `cargo make real-world-memory-service-native-dreaming`, proving public/local
+  memory summary, proactive brief, and scheduled-memory artifacts can be materialized
+  through `ElfService` readback while preserving XY-930 private/provider blockers.
diff --git a/scripts/real-world-docker.sh b/scripts/real-world-docker.sh
index a6413839..ee7e9685 100755
--- a/scripts/real-world-docker.sh
+++ b/scripts/real-world-docker.sh
@@ -22,6 +22,12 @@ memory-live-consolidation)
 		-e ELF_CONSOLIDATION_LIVE_FIXTURES \
 		baseline-runner bash scripts/real-world-consolidation-live-adapter.sh
 	;;
+memory-service-native-dreaming)
+	docker compose -f docker-compose.baseline.yml run --build --rm \
+		-e ELF_DREAMING_SERVICE_NATIVE_REPORT_DIR \
+		-e ELF_DREAMING_SERVICE_NATIVE_FIXTURES \
+		baseline-runner bash scripts/real-world-dreaming-service-native.sh
+	;;
 memory-live-adapters)
 	lightrag_start="$(printenv ELF_LIGHTRAG_CONTEXT_START || true)"
 	graphiti_start="$(printenv ELF_GRAPHITI_ZEP_SMOKE_START || true)"
diff --git a/scripts/real-world-dreaming-service-native.sh b/scripts/real-world-dreaming-service-native.sh
new file mode 100755
index 00000000..f6592d39
--- /dev/null
+++ b/scripts/real-world-dreaming-service-native.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+REPORT_DIR="${ELF_DREAMING_SERVICE_NATIVE_REPORT_DIR:-${ROOT_DIR}/tmp/real-world-memory/service-native-dreaming}"
+FIXTURE_ROOT="${ELF_DREAMING_SERVICE_NATIVE_FIXTURES:-${ROOT_DIR}/apps/elf-eval/fixtures/real_world_memory}"
+INPUT_FIXTURE_DIR="${REPORT_DIR}/input-fixtures"
+
+if [[ ! -f "/.dockerenv" && "${ELF_DREAMING_SERVICE_NATIVE_ALLOW_HOST:-0}" != "1" ]]; then
+  echo "Refusing to run service-native Dreaming readback outside Docker. Use cargo make real-world-memory-service-native-dreaming." >&2
+  exit 1
+fi
+
+for cmd in bash cargo jq; do
+  if ! command -v "${cmd}" >/dev/null 2>&1; then
+    echo "Missing ${cmd} in service-native Dreaming readback runner." >&2
+    exit 1
+  fi
+done
+
+mkdir -p "${REPORT_DIR}"
+rm -rf "${INPUT_FIXTURE_DIR}" \
+  "${REPORT_DIR:?}/elf-fixtures" \
+  "${REPORT_DIR:?}/elf-materialization.json" \
+  "${REPORT_DIR:?}/report.json" \
+  "${REPORT_DIR:?}/report.md" \
+  "${REPORT_DIR:?}/summary.json"
+
+mkdir -p "${INPUT_FIXTURE_DIR}"
+cp -R "${FIXTURE_ROOT}/memory_summary" "${INPUT_FIXTURE_DIR}/memory_summary"
+cp -R "${FIXTURE_ROOT}/proactive_brief" "${INPUT_FIXTURE_DIR}/proactive_brief"
+cp -R "${FIXTURE_ROOT}/scheduled_memory" "${INPUT_FIXTURE_DIR}/scheduled_memory"
+
+cd "${ROOT_DIR}"
+
+cargo run -p elf-eval --bin real_world_live_adapter -- elf \
+  --fixtures "${INPUT_FIXTURE_DIR}" \
+  --out-fixtures "${REPORT_DIR}/elf-fixtures" \
+  --evidence-out "${REPORT_DIR}/elf-materialization.json" \
+  --config config/local/elf.docker.toml \
+  --adapter-id elf_service_native_dreaming
+
+cargo run -p elf-eval --bin real_world_job_benchmark -- run \
+  --fixtures "${REPORT_DIR}/elf-fixtures" \
+  --out "${REPORT_DIR}/report.json" \
+  --run-id real-world-memory-service-native-dreaming \
+  --adapter-id elf_service_native_dreaming \
+  --adapter-name "ELF service-native Dreaming readback adapter" \
+  --adapter-behavior service_native_dreaming_readback \
+  --adapter-storage-status pass \
+  --adapter-runtime-status pass \
+  --adapter-notes "Materialized through ElfService add_note/list/search readback for memory_summary, proactive_brief, and scheduled_memory fixtures. Private/provider blockers remain typed non-pass records under XY-930."
+
+cargo run -p elf-eval --bin real_world_job_benchmark -- publish \
+  --report "${REPORT_DIR}/report.json" \
+  --out "${REPORT_DIR}/report.md"
+
+jq -n \
+  --slurpfile materialization "${REPORT_DIR}/elf-materialization.json" \
+  --slurpfile report "${REPORT_DIR}/report.json" \
+  '{
+    schema: "elf.service_native_dreaming_readback_sweep/v1",
+    generated_at: (now | todateiso8601),
+    fixture_dir: (env.ELF_DREAMING_SERVICE_NATIVE_FIXTURES // "apps/elf-eval/fixtures/real_world_memory"),
+    artifact_dir: (env.ELF_DREAMING_SERVICE_NATIVE_REPORT_DIR // "tmp/real-world-memory/service-native-dreaming"),
+    adapter: {
+      adapter_id: "elf_service_native_dreaming",
+      evidence_class: "service_native_readback",
+      materialization: $materialization[0],
+      report: {
+        json: "tmp/real-world-memory/service-native-dreaming/report.json",
+        markdown: "tmp/real-world-memory/service-native-dreaming/report.md",
+        summary: $report[0].summary,
+        suites: $report[0].suites
+      }
+    },
+    comparison_boundary: {
+      baseline: "XY-955 fixture-backed Dreaming outputs",
+      judgment_rule: "improved only when service-native readback scores source-linked artifacts without stale, tombstoned, unsupported, untraced, or source-mutation violations",
+      private_provider_boundary: "XY-930 remains blocked unless operator-owned manifest and explicit provider setup exist"
+    }
+  }' >"${REPORT_DIR}/summary.json"
+
+echo "Service-native Dreaming readback reports:"
+echo "  ${REPORT_DIR}/elf-materialization.json"
+echo "  ${REPORT_DIR}/report.json"
+echo "  ${REPORT_DIR}/report.md"
+echo "  ${REPORT_DIR}/summary.json"