Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Makefile.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
# | real-world-memory-scheduled | composite | |
# | real-world-memory-scheduled-json | command | |
# | real-world-memory-scheduled-report | command | |
# | real-world-memory-service-native-dreaming | command | |
# | real-world-memory-summary | composite | |
# | real-world-memory-summary-json | command | |
# | real-world-memory-summary-report | command | |
Expand Down Expand Up @@ -865,6 +866,14 @@ args = [
"tmp/real-world-memory/scheduled/report.md",
]

[tasks.real-world-memory-service-native-dreaming]
workspace = false
command = "bash"
args = [
"scripts/real-world-docker.sh",
"memory-service-native-dreaming",
]

[tasks.real-world-memory-summary]
workspace = false
dependencies = [
Expand Down
16 changes: 13 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,14 @@ provider-backed ELF evidence was required.
competitive status unchanged: no ELF-over-Letta win, tie, or loss is allowed until
exported Letta core block JSON, archival readback/search JSON, and fixture source ids
are present.
- Service-native Dreaming readback after XY-986: the June 19 follow-up adds
`cargo make real-world-memory-service-native-dreaming`, a Docker-contained ELF
service readback command for `memory_summary`, `proactive_brief`, and
`scheduled_memory`. The slice scores 9 pass, 0 wrong_result, and 2 typed XY-930
private/provider blockers with 22/22 evidence, source-ref, and quote coverage.
This improves local Dreaming runtime authority and auditability, but it does not
prove Pulse, ChatGPT Tasks, Claude Dreams, hosted managed-memory, or private-corpus
parity.
- Full-suite live real-world adapter sweep after XY-926: ELF and qmd emit
Docker-isolated `live_real_world` records for all 55 checked-in jobs across 13 suites
through `cargo make real-world-memory-live-adapters`. Both keep the original
Expand Down Expand Up @@ -309,6 +317,7 @@ Detailed evidence and interpretation:
- [Dreaming Competitor-Strength Retest Report - June 17, 2026](docs/evidence/benchmarking/2026-06-17-dreaming-competitor-strength-retest-report.md)
- [qmd Debug-Ergonomics Dreaming Retest Report - June 19, 2026](docs/evidence/benchmarking/2026-06-19-qmd-debug-ergonomics-dreaming-retest-report.md)
- [OpenViking Trajectory Materialization Report - June 19, 2026](docs/evidence/benchmarking/2026-06-19-openviking-trajectory-materialization-report.md)
- [Service-Native Dreaming Readback Report - June 19, 2026](docs/evidence/benchmarking/2026-06-19-service-native-dreaming-readback-report.md)
- [Live Baseline Benchmark Runbook](docs/runbook/benchmarking/live_baseline_benchmark.md)
- [Single-User Production Runbook](docs/runbook/single_user_production.md)
- Benchmark contract:
Expand Down Expand Up @@ -406,9 +415,10 @@ Detailed comparison, mechanism-level analysis, and source map:
- [Dreaming Product Surface Follow-Up Research](docs/research/dreaming_product_surface_followup.md)

Latest real-world benchmark report: June 19, 2026. Latest external research refresh:
June 11, 2026; June 19 adds the qmd debug-ergonomics Dreaming retest after the June
17 competitor-strength closeout and the June 16 temporal reconciliation, live
consolidation self-check, proactive-brief, and scheduled-memory scoring evidence.
June 11, 2026; June 19 adds service-native Dreaming readback after the qmd
debug-ergonomics Dreaming retest, the June 17 competitor-strength closeout, and the
June 16 temporal reconciliation, live consolidation self-check, proactive-brief, and
scheduled-memory scoring evidence.

## Documentation

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,324 @@
{
"schema": "elf.real_world_live_adapter_materialization/v1",
"adapter_id": "elf_service_native_dreaming",
"adapter_kind": "elf_service_runtime",
"status": "blocked",
"fixtures": "/workspace/tmp/real-world-memory/service-native-dreaming/input-fixtures",
"generated_fixtures": "/workspace/tmp/real-world-memory/service-native-dreaming/elf-fixtures",
"command_evidence": [
{
"label": "elf_service_runtime",
"status": "blocked",
"command": "cargo run -p elf-eval --bin real_world_live_adapter -- elf",
"artifact": "/workspace/tmp/real-world-memory/service-native-dreaming/elf-materialization.json",
"reason": "ELF live adapter used ElfService, worker indexing, and search_raw."
}
],
"jobs": [
{
"job_id": "memory-summary-source-trace-001",
"suite": "memory_summary",
"title": "Read back a reviewable current memory summary with source trace",
"status": "pass",
"query": "Show the current memory summary surface and explain why stale, tombstoned, and unsupported derived memories are not top-of-mind current facts.",
"evidence_ids": [
"summary-contract-current",
"xy952-summary-contract",
"summary-ttl-tombstone",
"summary-contract-non-parity-boundary"
],
"returned_count": 5,
"latency_ms": 51.676775,
"trace_id": "2e80669d-2bcf-4238-b780-9b42aa72d2a2",
"failure": null,
"dreaming_readback": {
"artifact_kind": "elf.memory_summary/v1",
"runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
"service_list_count": 9,
"trace_id": "2e80669d-2bcf-4238-b780-9b42aa72d2a2",
"generated_artifact_count": 1,
"selected_source_refs": [
"stale-summary-gap",
"summary-background-sot",
"summary-contract-current",
"summary-contract-non-parity-boundary",
"summary-temporary-claim",
"summary-ttl-tombstone",
"superseded-live-evolution-loss",
"xy952-summary-contract"
],
"missing_source_refs": [],
"source_mutation_count": 0,
"no_source_mutation_checked": true
}
},
{
"job_id": "proactive-daily-project-brief-001",
"suite": "proactive_brief",
"title": "Generate a daily project brief from current project memory",
"status": "pass",
"query": "Generate a daily project brief with only source-linked current recommendations.",
"evidence_ids": [
"daily-current-validation-gate",
"daily-current-ledger-update"
],
"returned_count": 3,
"latency_ms": 6.884306,
"trace_id": "fc854889-2ac4-436b-a885-b43053922cb9",
"failure": null,
"dreaming_readback": {
"artifact_kind": "elf.proactive_project_brief/v1",
"runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
"service_list_count": 3,
"trace_id": "fc854889-2ac4-436b-a885-b43053922cb9",
"generated_artifact_count": 1,
"selected_source_refs": [
"daily-current-ledger-update",
"daily-current-validation-gate",
"daily-old-parity-trap"
],
"missing_source_refs": [],
"source_mutation_count": 0,
"no_source_mutation_checked": true
}
},
{
"job_id": "proactive-private-corpus-refresh-blocked-001",
"suite": "proactive_brief",
"title": "Block private-corpus refresh suggestions when no operator manifest exists",
"status": "blocked",
"query": "Suggest a private-corpus refresh when private inputs exist.",
"evidence_ids": [],
"returned_count": 0,
"latency_ms": 0.0,
"trace_id": null,
"failure": "No operator-owned private production corpus manifest is available; private-corpus refresh suggestions stay blocked under XY-930."
},
{
"job_id": "proactive-resume-work-brief-001",
"suite": "proactive_brief",
"title": "Generate a resume-work brief from current handoff memory",
"status": "pass",
"query": "Generate a resume-work brief that identifies the current next action and validation command.",
"evidence_ids": [
"resume-current-handoff",
"resume-current-validation"
],
"returned_count": 3,
"latency_ms": 7.336724,
"trace_id": "c77d3ddb-d0c0-4168-a528-a585adfc8a7f",
"failure": null,
"dreaming_readback": {
"artifact_kind": "elf.proactive_project_brief/v1",
"runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
"service_list_count": 3,
"trace_id": "c77d3ddb-d0c0-4168-a528-a585adfc8a7f",
"generated_artifact_count": 1,
"selected_source_refs": [
"resume-current-handoff",
"resume-current-validation",
"resume-stale-validation"
],
"missing_source_refs": [],
"source_mutation_count": 0,
"no_source_mutation_checked": true
}
},
{
"job_id": "proactive-stale-decision-audit-001",
"suite": "proactive_brief",
"title": "Warn about a stale project decision before suggesting work",
"status": "pass",
"query": "Audit stale project decisions before generating proactive suggestions.",
"evidence_ids": [
"stale-decision-old-gate",
"stale-decision-new-gate"
],
"returned_count": 2,
"latency_ms": 9.269810999999999,
"trace_id": "d7decd9a-d635-41b5-9dcc-c6e3c5c44fb7",
"failure": null,
"dreaming_readback": {
"artifact_kind": "elf.proactive_project_brief/v1",
"runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
"service_list_count": 2,
"trace_id": "d7decd9a-d635-41b5-9dcc-c6e3c5c44fb7",
"generated_artifact_count": 1,
"selected_source_refs": [
"stale-decision-new-gate",
"stale-decision-old-gate"
],
"missing_source_refs": [],
"source_mutation_count": 0,
"no_source_mutation_checked": true
}
},
{
"job_id": "proactive-stale-plan-preference-warning-001",
"suite": "proactive_brief",
"title": "Reject stale plan and preference suggestions after TTL invalidation",
"status": "pass",
"query": "Warn me about stale plans or preferences before making proactive suggestions.",
"evidence_ids": [
"stale-plan-ttl",
"current-preference-concise-brief"
],
"returned_count": 5,
"latency_ms": 7.991892,
"trace_id": "f2e795b5-7ac4-4f7d-ab49-75392f6ba8a8",
"failure": null,
"dreaming_readback": {
"artifact_kind": "elf.proactive_project_brief/v1",
"runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
"service_list_count": 5,
"trace_id": "f2e795b5-7ac4-4f7d-ab49-75392f6ba8a8",
"generated_artifact_count": 1,
"selected_source_refs": [
"current-plan-run-gate",
"current-preference-concise-brief",
"old-preference-long-brief",
"stale-plan-old",
"stale-plan-ttl"
],
"missing_source_refs": [],
"source_mutation_count": 0,
"no_source_mutation_checked": true
}
},
{
"job_id": "scheduled-knowledge-page-refresh-suggestion-001",
"suite": "scheduled_memory",
"title": "Suggest a knowledge-page refresh from scheduled memory",
"status": "pass",
"query": "Run the scheduled knowledge-page refresh suggestion task.",
"evidence_ids": [
"scheduled-knowledge-page-stale-finding",
"scheduled-knowledge-reviewable-refresh"
],
"returned_count": 3,
"latency_ms": 6.31843,
"trace_id": "df5b34bc-b8bd-427c-a531-7c37ff2444c8",
"failure": null,
"dreaming_readback": {
"artifact_kind": "elf.scheduled_memory_task/v1",
"runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
"service_list_count": 3,
"trace_id": "df5b34bc-b8bd-427c-a531-7c37ff2444c8",
"generated_artifact_count": 1,
"selected_source_refs": [
"scheduled-knowledge-page-stale-finding",
"scheduled-knowledge-reviewable-refresh",
"scheduled-knowledge-silent-rewrite-trap"
],
"missing_source_refs": [],
"source_mutation_count": 0,
"no_source_mutation_checked": true
}
},
{
"job_id": "scheduled-private-provider-scheduler-blocked-001",
"suite": "scheduled_memory",
"title": "Block private/provider scheduled tasks without operator inputs",
"status": "blocked",
"query": "Run private/provider scheduled memory tasks when operator inputs exist.",
"evidence_ids": [],
"returned_count": 0,
"latency_ms": 0.0,
"trace_id": null,
"failure": "No operator-owned private production corpus manifest, provider credentials, or hosted scheduler configuration is available; private/provider scheduled tasks stay blocked under XY-930."
},
{
"job_id": "scheduled-stale-decision-audit-001",
"suite": "scheduled_memory",
"title": "Audit a stale project decision during a scheduled task",
"status": "pass",
"query": "Run the scheduled stale decision audit.",
"evidence_ids": [
"scheduled-old-consolidation-only-decision",
"scheduled-current-direct-suite-decision"
],
"returned_count": 2,
"latency_ms": 5.7482619999999995,
"trace_id": "3ca5cf35-007e-4c15-9dce-3983a7053e9a",
"failure": null,
"dreaming_readback": {
"artifact_kind": "elf.scheduled_memory_task/v1",
"runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
"service_list_count": 2,
"trace_id": "3ca5cf35-007e-4c15-9dce-3983a7053e9a",
"generated_artifact_count": 1,
"selected_source_refs": [
"scheduled-current-direct-suite-decision",
"scheduled-old-consolidation-only-decision"
],
"missing_source_refs": [],
"source_mutation_count": 0,
"no_source_mutation_checked": true
}
},
{
"job_id": "scheduled-stale-preference-plan-audit-001",
"suite": "scheduled_memory",
"title": "Audit stale preferences and plans during a scheduled task",
"status": "pass",
"query": "Run the scheduled stale preference and plan audit.",
"evidence_ids": [
"scheduled-stale-old-plan",
"scheduled-stale-plan-expired",
"scheduled-current-trace-plan",
"scheduled-current-reviewable-preference"
],
"returned_count": 5,
"latency_ms": 7.603808,
"trace_id": "8e5741df-c5d5-4e82-a32d-dc8606e8b876",
"failure": null,
"dreaming_readback": {
"artifact_kind": "elf.scheduled_memory_task/v1",
"runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
"service_list_count": 5,
"trace_id": "8e5741df-c5d5-4e82-a32d-dc8606e8b876",
"generated_artifact_count": 1,
"selected_source_refs": [
"scheduled-current-reviewable-preference",
"scheduled-current-trace-plan",
"scheduled-old-silent-mutation-preference",
"scheduled-stale-old-plan",
"scheduled-stale-plan-expired"
],
"missing_source_refs": [],
"source_mutation_count": 0,
"no_source_mutation_checked": true
}
},
{
"job_id": "scheduled-weekly-project-status-summary-001",
"suite": "scheduled_memory",
"title": "Run a weekly project status summary from current memory",
"status": "pass",
"query": "Run the weekly project status summary scheduled task.",
"evidence_ids": [
"scheduled-weekly-current-gate",
"scheduled-weekly-ledger-update"
],
"returned_count": 3,
"latency_ms": 5.362345,
"trace_id": "12bcc69c-4971-4cd5-9f58-16ae45772e7f",
"failure": null,
"dreaming_readback": {
"artifact_kind": "elf.scheduled_memory_task/v1",
"runtime_path": "ElfService::add_note -> ElfService::list -> derived readback artifact",
"service_list_count": 3,
"trace_id": "12bcc69c-4971-4cd5-9f58-16ae45772e7f",
"generated_artifact_count": 1,
"selected_source_refs": [
"scheduled-weekly-current-gate",
"scheduled-weekly-hosted-parity-trap",
"scheduled-weekly-ledger-update"
],
"missing_source_refs": [],
"source_mutation_count": 0,
"no_source_mutation_checked": true
}
}
]
}
Loading