diff --git a/Makefile.toml b/Makefile.toml index 71381fc9..a45ec771 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -28,6 +28,7 @@ # | real-world-memory-p1-closeout | composite | | # | real-world-memory-p1-closeout-json | command | | # | real-world-memory-p1-closeout-report | command | | +# | real-world-memory-p2-knowledge-closeout | composite | | # | real-world-memory-core-archival | composite | | # | real-world-memory-core-archival-json | command | | # | real-world-memory-core-archival-report | command | | @@ -370,6 +371,13 @@ args = [ "tmp/real-world-memory/p1-closeout/report.md", ] +[tasks.real-world-memory-p2-knowledge-closeout] +workspace = false +dependencies = [ + "real-world-memory-source-library-report", + "real-world-memory-knowledge-report", +] + [tasks.real-world-memory-core-archival] workspace = false dependencies = [ diff --git a/README.md b/README.md index a2b31900..ef6536bc 100644 --- a/README.md +++ b/README.md @@ -262,6 +262,15 @@ provider-backed ELF evidence was required. correction/rollback, but remains fixture-backed and does not claim private-corpus, provider-backed, live-adapter, hosted-memory, or broad competitor parity. P2 queueing remains conditional on main-thread acceptance of the closeout. +- P2 Knowledge Workspace PageIndex/OpenKB closeout after XY-1066: the June 22 + closeout adds `cargo make real-world-memory-p2-knowledge-closeout`, a checked-in + same-corpus self-assessment report, and a changed-source watch/rebuild knowledge + fixture. The source-library slice remains 2 pass/0 wrong_result and the knowledge + slice is now 3 pass/0 wrong_result, covering long-document source refs, hydrated + excerpts, project/entity/concept/issue pages, stale lint, version diff, and + reviewable memory-candidate boundaries. VectifyAI PageIndex and OpenKB remain + `not_tested` reference-only rows until contained adapters emit comparable tree/wiki + artifacts; no P3 issue is queued by this closeout. - Operator-approved public-proxy addendum after XY-930: the June 19 follow-up runs `cargo make baseline-production-private-addendum` with a simulated/public-proxy production corpus manifest approved for this stage. The run records 12 documents, @@ -394,6 +403,7 @@ Detailed evidence and interpretation: - [Live Knowledge-Page Rebuild/Lint Report - June 20, 2026](docs/evidence/benchmarking/2026-06-20-live-knowledge-page-rebuild-lint-report.md) - [Agent Knowledge OS Closeout Benchmark Report - June 20, 2026](docs/evidence/benchmarking/2026-06-20-agent-knowledge-os-closeout-benchmark-report.md) - [P1 Memory Authority Closeout Report - June 22, 2026](docs/evidence/benchmarking/2026-06-22-p1-memory-authority-closeout-report.md) +- [P2 Knowledge Workspace PageIndex/OpenKB Closeout Report - June 22, 2026](docs/evidence/benchmarking/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.md) - [Live Baseline Benchmark Runbook](docs/runbook/benchmarking/live_baseline_benchmark.md) - [Single-User Production Runbook](docs/runbook/single_user_production.md) - Benchmark contract: @@ -484,6 +494,7 @@ Detailed comparison, mechanism-level analysis, and source map: - [OpenMemory UI/Export Product Readback Report - June 19, 2026](docs/evidence/benchmarking/2026-06-19-openmemory-ui-export-product-readback-report.md) - [Operator-Approved Public-Proxy Production-Private Addendum - June 19, 2026](docs/evidence/benchmarking/2026-06-19-operator-approved-public-proxy-production-private-addendum.md) - [P1 Memory Authority Closeout Report - June 22, 2026](docs/evidence/benchmarking/2026-06-22-p1-memory-authority-closeout-report.md) +- [P2 Knowledge Workspace PageIndex/OpenKB Closeout Report - June 22, 2026](docs/evidence/benchmarking/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.md) - [Live Baseline Benchmark Runbook](docs/runbook/benchmarking/live_baseline_benchmark.md) - [Real-World Agent Memory Benchmark](docs/runbook/benchmarking/real_world_agent_memory_benchmark.md) - [External Memory Improvement Plan](docs/evidence/external_memory/external_memory_improvement_plan.md) @@ -499,7 +510,8 @@ Latest real-world benchmark report: June 22, 2026. Latest external research refr June 11, 2026; June 20 adds the Agent Knowledge OS Closeout Benchmark Report, the Graph Topic-Map Report - June 20, 2026, Knowledge Workspace Version-Diff Report - June 20, 2026, and the Live Knowledge-Page Rebuild/Lint Report - June 20, -2026; June 22 adds the P1 Memory Authority Closeout Report after the June 19 +2026; June 22 adds the P1 Memory Authority Closeout Report and P2 Knowledge +Workspace PageIndex/OpenKB Closeout Report after the June 19 XY-930 operator-approved public-proxy production addendum and service-native Dreaming readback, the qmd debug-ergonomics Dreaming retest, the June 17 competitor-strength closeout, and the June 16 temporal reconciliation, live consolidation self-check, diff --git a/apps/elf-eval/fixtures/real_world_memory/knowledge/changed_source_watch_rebuild.json b/apps/elf-eval/fixtures/real_world_memory/knowledge/changed_source_watch_rebuild.json new file mode 100644 index 00000000..a8fe819a --- /dev/null +++ b/apps/elf-eval/fixtures/real_world_memory/knowledge/changed_source_watch_rebuild.json @@ -0,0 +1,368 @@ +{ + "schema": "elf.real_world_job/v1", + "job_id": "knowledge-watch-rebuild-003", + "suite": "knowledge_compilation", + "title": "Rebuild changed-source knowledge pages without mutating source memory", + "corpus": { + "corpus_id": "real-world-memory-knowledge-synthetic-2026-06-22", + "profile": "synthetic", + "items": [ + { + "evidence_id": "watch-source-original", + "kind": "source_library_record", + "text": "Original source: The Knowledge Workspace watch path tracks Source Library document pageindex-openkb-brief and stores normalized source refs for every cited page section.", + "source_ref": { + "schema": "source_ref/v1", + "resolver": "elf_doc_ext/v1", + "ref": { + "doc_id": "55555555-5555-4555-8555-555555555555", + "chunk_id": "66666666-6666-4666-8666-666666666666" + }, + "state": { + "content_hash": "watch-original-content-hash", + "chunk_hash": "watch-original-chunk-hash" + }, + "hashes": { + "content_hash": "watch-original-content-hash", + "chunk_hash": "watch-original-chunk-hash" + }, + "locator": { + "quote": { + "exact": "stores normalized source refs" + } + } + }, + "created_at": "2026-06-22T01:00:00Z" + }, + { + "evidence_id": "watch-source-updated", + "kind": "source_library_record", + "text": "Updated source: PageIndex-style long-document handling remains a reference expectation, while ELF proves Source Library source_ref hydration and Knowledge Workspace changed-source rebuild output on the same synthetic corpus.", + "source_ref": { + "schema": "source_ref/v1", + "resolver": "elf_doc_ext/v1", + "ref": { + "doc_id": "55555555-5555-4555-8555-555555555555", + "chunk_id": "77777777-7777-4777-8777-777777777777" + }, + "state": { + "content_hash": "watch-updated-content-hash", + "chunk_hash": "watch-updated-chunk-hash" + }, + "hashes": { + "content_hash": "watch-updated-content-hash", + "chunk_hash": "watch-updated-chunk-hash" + }, + "locator": { + "quote": { + "exact": "changed-source rebuild output" + } + } + }, + "created_at": "2026-06-22T01:10:00Z" + }, + { + "evidence_id": "watch-stale-page-snapshot", + "kind": "compiled_page", + "text": "Stale page snapshot: The old page says ELF has a contained PageIndex/OpenKB adapter pass and can queue P3 immediately without main-thread acceptance.", + "source_ref": { + "schema": "source_ref/v1", + "resolver": "real_world_memory_fixture/v1", + "ref": { + "fixture": "changed_source_watch_rebuild", + "evidence_id": "watch-stale-page-snapshot" + } + }, + "created_at": "2026-06-22T01:05:00Z" + }, + { + "evidence_id": "watch-lint-output", + "kind": "lint_finding", + "text": "Lint output: The contained PageIndex/OpenKB adapter-pass statement is stale and must not be presented as current truth until a contained adapter emits same-corpus artifacts.", + "source_ref": { + "schema": "source_ref/v1", + "resolver": "real_world_memory_fixture/v1", + "ref": { + "fixture": "changed_source_watch_rebuild", + "evidence_id": "watch-lint-output" + }, + "locator": { + "quote": { + "exact": "must not be presented as current truth" + } + } + }, + "created_at": "2026-06-22T01:11:00Z" + }, + { + "evidence_id": "watch-memory-candidate-proposal", + "kind": "consolidation_proposal", + "text": "Reviewable memory candidate: Knowledge page rebuild deltas may propose a memory candidate, but source documents and Memory Notes are not mutated by the watch/rebuild pass.", + "source_ref": { + "schema": "source_ref/v1", + "resolver": "real_world_memory_fixture/v1", + "ref": { + "fixture": "changed_source_watch_rebuild", + "evidence_id": "watch-memory-candidate-proposal" + }, + "locator": { + "quote": { + "exact": "source documents and Memory Notes are not mutated" + } + } + }, + "created_at": "2026-06-22T01:12:00Z" + } + ], + "adapter_response": { + "adapter_id": "fixture_knowledge", + "answer": { + "content": "The changed-source watch/rebuild page selects only pages citing the updated source ref, reports the stale PageIndex/OpenKB adapter claim as lint evidence, preserves a reviewed memory-candidate boundary, and leaves source documents plus Memory Notes unmodified.", + "claims": [ + { + "claim_id": "watch_rebuild_scope", + "text": "Changed-source rebuild selects only knowledge pages that cite the changed Source Library source refs.", + "evidence_ids": ["watch-source-original", "watch-source-updated"], + "confidence": "high" + }, + { + "claim_id": "stale_adapter_lint", + "text": "The old contained PageIndex/OpenKB adapter-pass claim is stale lint evidence, not current truth.", + "evidence_ids": ["watch-lint-output"], + "confidence": "high" + }, + { + "claim_id": "memory_candidate_boundary", + "text": "Knowledge rebuild deltas may propose reviewable memory candidates without mutating source documents or Memory Notes.", + "evidence_ids": ["watch-memory-candidate-proposal"], + "confidence": "high" + } + ], + "evidence_ids": [ + "watch-source-original", + "watch-source-updated", + "watch-lint-output", + "watch-memory-candidate-proposal" + ], + "pages": [ + { + "page_id": "project:knowledge-watch-rebuild", + "page_type": "project", + "title": "Knowledge Watch/Rebuild Changed-Source Page", + "path": "apps/elf-eval/fixtures/real_world_memory/knowledge/pages/watch_rebuild_changed_sources.md", + "sections": [ + { + "section_id": "changed-source-scope", + "heading": "Changed Source Scope", + "role": "current_truth", + "content": "Changed-source rebuild selects pages that already cite the updated Source Library source refs and leaves unrelated pages out of the rebuild.", + "evidence_ids": ["watch-source-original", "watch-source-updated"], + "timeline_event_ids": ["watch-source-updated-event"] + }, + { + "section_id": "stale-adapter-claim", + "heading": "Stale Adapter Claim", + "role": "lint", + "content": "The stale claim that ELF has a contained PageIndex/OpenKB adapter pass is retained only as lint evidence.", + "evidence_ids": ["watch-stale-page-snapshot"], + "timeline_event_ids": ["watch-stale-claim-detected"] + }, + { + "section_id": "memory-candidate-boundary", + "heading": "Memory Candidate Boundary", + "role": "review", + "content": "Watch/rebuild may propose a memory candidate from page deltas, but it must not mutate Source Library documents or Memory Notes directly.", + "evidence_ids": ["watch-memory-candidate-proposal"], + "timeline_event_ids": ["watch-memory-candidate-proposed"] + } + ], + "backlinks": [ + "project:elf-benchmark-suite", + "concept:derived-knowledge-pages" + ], + "lint_findings": [ + { + "finding_id": "lint-contained-adapter-pass-stale", + "finding_type": "stale_claim", + "severity": "error", + "text": "The contained PageIndex/OpenKB adapter-pass claim conflicts with the current reference-only comparison boundary.", + "evidence_ids": ["watch-stale-page-snapshot"], + "trap_id": "contained-adapter-pass-stale" + } + ], + "rebuild": { + "first_hash": "blake3:5d4ee9d4f1c0c6a1c7e8f9b1a2457777a41fb0a8d64ce0dd7989e6c5a8b6dd01", + "second_hash": "blake3:5d4ee9d4f1c0c6a1c7e8f9b1a2457777a41fb0a8d64ce0dd7989e6c5a8b6dd01", + "deterministic": true, + "allowed_variance": [] + }, + "page_version_diff": { + "schema": "elf.knowledge_page.version_diff/v1", + "available": true, + "previous_page_hash": "blake3:old-watch-page-hash", + "current_page_hash": "blake3:5d4ee9d4f1c0c6a1c7e8f9b1a2457777a41fb0a8d64ce0dd7989e6c5a8b6dd01", + "changed_sections": ["changed-source-scope", "stale-adapter-claim"], + "unchanged_sections": ["memory-candidate-boundary"] + } + } + ], + "latency_ms": 2.7, + "cost": { + "currency": "USD", + "amount": 0.0, + "input_tokens": 0, + "output_tokens": 0 + } + } + } + }, + "timeline": [ + { + "event_id": "watch-source-captured", + "ts": "2026-06-22T01:00:00Z", + "actor": "agent", + "action": "captured_source_ref_snapshot", + "evidence_ids": ["watch-source-original"], + "summary": "The original Source Library document snapshot was captured with normalized source refs." + }, + { + "event_id": "watch-source-updated-event", + "ts": "2026-06-22T01:10:00Z", + "actor": "agent", + "action": "changed_source_rebuild", + "evidence_ids": ["watch-source-updated"], + "summary": "A changed-source rebuild selected cited pages and produced current section output." + }, + { + "event_id": "watch-stale-claim-detected", + "ts": "2026-06-22T01:11:00Z", + "actor": "agent", + "action": "linted_stale_adapter_claim", + "evidence_ids": ["watch-lint-output"], + "summary": "The stale contained-adapter pass claim was retained as lint evidence." + }, + { + "event_id": "watch-memory-candidate-proposed", + "ts": "2026-06-22T01:12:00Z", + "actor": "agent", + "action": "queued_reviewable_memory_candidate", + "evidence_ids": ["watch-memory-candidate-proposal"], + "summary": "The changed page delta created a reviewable memory candidate without mutating source documents or Memory Notes." + } + ], + "prompt": { + "role": "user", + "content": "Run the Knowledge Workspace changed-source watch/rebuild check for the PageIndex/OpenKB comparison slice and report stale claims plus memory-candidate boundaries.", + "job_mode": "compile", + "constraints": [ + "cite_source_refs", + "lint_stale_adapter_claims", + "preserve_memory_candidate_review_boundary", + "do_not_upgrade_reference_only_competitors" + ] + }, + "expected_answer": { + "must_include": [ + { + "claim_id": "watch_rebuild_scope", + "text": "Changed-source rebuild selects only knowledge pages that cite the changed Source Library source refs." + }, + { + "claim_id": "stale_adapter_lint", + "text": "The old contained PageIndex/OpenKB adapter-pass claim is stale lint evidence, not current truth." + }, + { + "claim_id": "memory_candidate_boundary", + "text": "Knowledge rebuild deltas may propose reviewable memory candidates without mutating source documents or Memory Notes." + } + ], + "must_not_include": [ + "ELF has a contained PageIndex/OpenKB adapter pass", + "queue P3 immediately without main-thread acceptance", + "watch/rebuild mutates source documents directly" + ], + "evidence_links": { + "watch_rebuild_scope": ["watch-source-original", "watch-source-updated"], + "stale_adapter_lint": ["watch-lint-output"], + "memory_candidate_boundary": ["watch-memory-candidate-proposal"] + }, + "answer_type": "compiled_knowledge", + "accepted_alternates": [], + "requires_caveat": false, + "requires_refusal": false + }, + "required_evidence": [ + { + "evidence_id": "watch-source-updated", + "claim_id": "watch_rebuild_scope", + "requirement": "cite", + "quote": "changed-source rebuild output" + }, + { + "evidence_id": "watch-lint-output", + "claim_id": "stale_adapter_lint", + "requirement": "cite", + "quote": "must not be presented as current truth" + }, + { + "evidence_id": "watch-memory-candidate-proposal", + "claim_id": "memory_candidate_boundary", + "requirement": "cite", + "quote": "source documents and Memory Notes are not mutated" + } + ], + "negative_traps": [ + { + "trap_id": "contained-adapter-pass-stale", + "type": "stale_fact", + "evidence_ids": ["watch-stale-page-snapshot"], + "failure_if_used": true + } + ], + "scoring_rubric": { + "dimensions": { + "answer_correctness": { + "weight": 0.25, + "max_points": 1.0, + "criteria": "States changed-source rebuild scope and does not upgrade reference-only competitors." + }, + "evidence_grounding": { + "weight": 0.3, + "max_points": 1.0, + "criteria": "Cites changed source refs, stale lint evidence, and memory candidate proposal evidence." + }, + "trap_avoidance": { + "weight": 0.2, + "max_points": 1.0, + "criteria": "Treats the contained adapter pass claim as stale lint evidence." + }, + "workflow_helpfulness": { + "weight": 0.15, + "max_points": 1.0, + "criteria": "Reports changed, stale, and reviewable memory-candidate outputs." + }, + "lifecycle_behavior": { + "weight": 0.1, + "max_points": 1.0, + "criteria": "Preserves source-of-truth and reviewed promotion boundaries." + } + }, + "pass_threshold": 0.8, + "hard_fail_rules": [ + "claiming PageIndex/OpenKB adapter pass without contained artifacts", + "claiming watch/rebuild directly mutates source documents or Memory Notes" + ] + }, + "allowed_uncertainty": { + "can_answer_unknown": false, + "acceptable_phrases": [], + "fallback_action": "state_blocker" + }, + "tags": [ + "synthetic", + "knowledge", + "watch_rebuild", + "pageindex_openkb_reference", + "benchmark_artifact" + ] +} diff --git a/apps/elf-eval/fixtures/real_world_memory/knowledge/pages/watch_rebuild_changed_sources.md b/apps/elf-eval/fixtures/real_world_memory/knowledge/pages/watch_rebuild_changed_sources.md new file mode 100644 index 00000000..42ec9e2d --- /dev/null +++ b/apps/elf-eval/fixtures/real_world_memory/knowledge/pages/watch_rebuild_changed_sources.md @@ -0,0 +1,37 @@ +# Knowledge Watch/Rebuild Changed-Source Page + +## Changed Source Scope + +Changed-source rebuild selects pages that already cite the updated Source Library +source refs and leaves unrelated pages out of the rebuild. + +Sources: `watch-source-original`, `watch-source-updated`, +`watch-source-updated-event`. + +## Stale Adapter Claim + +The stale claim that ELF has a contained PageIndex/OpenKB adapter pass is retained +only as lint evidence. + +Sources: `watch-stale-page-snapshot`, `watch-stale-claim-detected`. + +## Memory Candidate Boundary + +Watch/rebuild may propose a memory candidate from page deltas, but it must not +mutate Source Library documents or Memory Notes directly. + +Sources: `watch-memory-candidate-proposal`, +`watch-memory-candidate-proposed`. + +## Lint Findings + +- `lint-contained-adapter-pass-stale`: stale claim; the contained PageIndex/OpenKB + adapter-pass claim conflicts with the current reference-only comparison boundary. + +## Version Diff + +Schema: `elf.knowledge_page.version_diff/v1`. + +Changed sections: `changed-source-scope`, `stale-adapter-claim`. + +Unchanged sections: `memory-candidate-boundary`. diff --git a/apps/elf-eval/fixtures/report_snapshots/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.json b/apps/elf-eval/fixtures/report_snapshots/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.json new file mode 100644 index 00000000..beae050d --- /dev/null +++ b/apps/elf-eval/fixtures/report_snapshots/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.json @@ -0,0 +1,156 @@ +{ + "schema": "elf.p2_knowledge_workspace_pageindex_openkb_closeout_report/v1", + "authority": "XY-1066", + "phase": "P2 Knowledge Workspace closeout", + "generated_at": "2026-06-22T00:00:00Z", + "report_markdown": "docs/evidence/benchmarking/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.md", + "commands": [ + { + "command": "cargo make real-world-memory-source-library", + "status": "pass", + "artifact_json": "tmp/real-world-memory/source-library-report.json", + "artifact_markdown": "tmp/real-world-memory/source-library-report.md", + "proves": [ + "long_document_source_handling", + "source_ref_hydration", + "source_only_capture_boundary" + ] + }, + { + "command": "cargo make real-world-memory-knowledge", + "status": "pass", + "artifact_json": "tmp/real-world-memory/knowledge-report.json", + "artifact_markdown": "tmp/real-world-memory/knowledge-report.md", + "proves": [ + "derived_project_entity_concept_issue_pages", + "stale_lint", + "changed_source_watch_rebuild", + "page_version_diff", + "reviewable_memory_candidate_boundary" + ] + }, + { + "command": "cargo make real-world-memory-p2-knowledge-closeout", + "status": "pass", + "artifact_json": "tmp/real-world-memory/source-library-report.json and tmp/real-world-memory/knowledge-report.json", + "artifact_markdown": "tmp/real-world-memory/source-library-report.md and tmp/real-world-memory/knowledge-report.md", + "proves": [ + "p2_closeout_fixture_bundle_runs" + ] + } + ], + "elf_same_corpus_results": [ + { + "suite": "source_library", + "status": "pass", + "jobs": 2, + "pass": 2, + "wrong_result": 0, + "incomplete": 0, + "blocked": 0, + "not_tested": 0, + "coverage": [ + "Saved long-form article keeps canonical source metadata.", + "Hydrated excerpt keeps source_ref/v1 and elf_doc_ext/v1 evidence.", + "Saved social thread keeps source metadata and explicit promotion boundary." + ], + "artifact": "tmp/real-world-memory/source-library-report.json" + }, + { + "suite": "knowledge_compilation", + "status": "pass", + "jobs": 3, + "pass": 3, + "wrong_result": 0, + "incomplete": 0, + "blocked": 0, + "not_tested": 0, + "coverage": [ + "Project page rebuild keeps source notes authoritative and flags unsupported sections.", + "Entity, concept, and issue timeline pages cite source evidence and lint stale claims.", + "Changed-source watch/rebuild reports changed, stale, and reviewable memory-candidate outputs without source mutation." + ], + "artifact": "tmp/real-world-memory/knowledge-report.json" + } + ], + "comparison_matrix": [ + { + "target": "ELF Source Library", + "status": "pass", + "evidence_class": "same_corpus_fixture", + "reference_expectation": "Long-document source capture with stable metadata and citable excerpts.", + "proven": "ELF source fixtures preserve metadata, source refs, hydrated excerpts, and no silent memory promotion.", + "remaining_untested": "PageIndex-style vectorless tree reasoning over long PDFs is not exercised." + }, + { + "target": "ELF Knowledge Workspace", + "status": "pass", + "evidence_class": "same_corpus_fixture", + "reference_expectation": "Derived wiki/entity/concept pages, lint, watch/rebuild, and source refs.", + "proven": "ELF knowledge fixtures compile source-linked pages, detect stale claims, expose rebuild/version-diff metadata, and preserve reviewed memory-candidate boundaries.", + "remaining_untested": "OpenKB product compilation, saved explorations, and recompile workflow are not exercised through an OpenKB adapter." + }, + { + "target": "VectifyAI PageIndex", + "status": "not_tested", + "evidence_class": "reference_only", + "reference_expectation": "Vectorless long-document tree retrieval and PageIndex MCP ecosystem behavior.", + "proven": "No PageIndex contained adapter artifact is checked in for this closeout.", + "remaining_untested": "Same-corpus PageIndex tree artifacts, cited node paths, long-PDF traversal output, and MCP readback." + }, + { + "target": "VectifyAI OpenKB", + "status": "not_tested", + "evidence_class": "reference_only", + "reference_expectation": "Document-to-wiki compilation, concept/entity pages, lint, watch, and recompile workflow.", + "proven": "No OpenKB contained adapter artifact is checked in for this closeout.", + "remaining_untested": "Same-corpus OpenKB generated wiki pages, entity/concept index, lint output, watch/recompile trace, and source-ref mapping." + }, + { + "target": "P3 PageIndex/OpenKB adapter queue", + "status": "blocked", + "evidence_class": "process_boundary", + "reference_expectation": "Queue only after P2 self-assessment passes and main-thread acceptance selects the next P3 issue.", + "proven": "This report makes the P3 adapter work decision-ready but does not apply decodex:queued:elf.", + "remaining_untested": "Human-selected P3 issue scope and contained adapter implementation." + } + ], + "typed_state_summary": { + "pass": 2, + "wrong_result": 0, + "incomplete": 0, + "blocked": 1, + "not_tested": 2 + }, + "self_assessment": { + "verdict": "pass_with_reference_only_competitor_boundary", + "what_elf_can_prove": [ + "ELF can prove source-only long-document capture, source_ref hydration, and explicit memory-promotion boundaries on the checked-in source_library corpus.", + "ELF can prove derived project/entity/concept/issue pages, stale lint, changed-source watch/rebuild, version-diff metadata, and reviewable memory-candidate boundaries on the checked-in knowledge corpus." + ], + "what_remains_untested": [ + "PageIndex vectorless tree retrieval, long-PDF node paths, and MCP product behavior.", + "OpenKB document-to-wiki compilation, saved exploration, watch/recompile product flow, and source mapping.", + "Any win/tie/loss claim against PageIndex or OpenKB." + ] + }, + "p3_queue_decision": { + "ready_to_queue_after_main_thread_acceptance": true, + "queued_in_this_lane": false, + "queued_label_applied": false, + "next_issue_shape": "one contained PageIndex/OpenKB adapter issue that emits same-corpus tree/wiki artifacts, source refs, lint/watch output, and typed benchmark states" + }, + "claim_boundaries": { + "allowed": [ + "ELF P2 Knowledge Workspace closeout passes its checked-in source_library and knowledge fixture self-assessment.", + "ELF can compare its outputs to PageIndex/OpenKB-style expectations only as reference expectations in this closeout.", + "P3 adapter work is decision-ready after main-thread acceptance." + ], + "not_allowed": [ + "Do not claim ELF beats PageIndex or OpenKB.", + "Do not treat reference-only PageIndex/OpenKB rows as pass evidence.", + "Do not queue a P3 issue in this lane.", + "Do not claim live private-corpus, hosted provider, or product UI quality from this fixture-backed closeout." + ] + } +} diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs index 389ccf58..43ab8fb5 100644 --- a/apps/elf-eval/tests/real_world_job_benchmark.rs +++ b/apps/elf-eval/tests/real_world_job_benchmark.rs @@ -275,6 +275,10 @@ fn agent_knowledge_os_closeout_benchmark_report_json_path() -> Result { report_snapshot_path("2026-06-20-agent-knowledge-os-closeout-benchmark-report.json") } +fn p2_knowledge_workspace_pageindex_openkb_closeout_report_json_path() -> Result { + report_snapshot_path("2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.json") +} + fn openmemory_ui_export_product_readback_report_json_path() -> Result { report_snapshot_path("2026-06-19-openmemory-ui-export-product-readback-report.json") } @@ -337,6 +341,14 @@ fn agent_knowledge_os_closeout_benchmark_report_markdown_path() -> Result Result { + Ok(workspace_root()? + .join("docs") + .join("evidence") + .join("benchmarking") + .join("2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.md")) +} + fn openmemory_ui_export_product_readback_report_markdown_path() -> Result { Ok(workspace_root()? .join("docs") @@ -2542,7 +2554,7 @@ fn assert_live_sweep_record(adapter: &Value, production_ops_status: &str) -> Res fn runner_discovers_nested_fixture_layout() -> Result<()> { let report = run_json_report_from(fixture_root())?; - assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(66)); + assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(67)); Ok(()) } @@ -2682,18 +2694,18 @@ fn consolidation_fixtures_report_reviewable_proposal_metrics() -> Result<()> { fn knowledge_fixtures_report_page_metrics() -> Result<()> { let report = run_json_report_from(knowledge_fixture_dir())?; - assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(2)); - assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(2)); + assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(3)); + assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(3)); assert_eq!(report.pointer("/summary/unsupported_claim_count").and_then(Value::as_u64), Some(0)); assert_eq!(report.pointer("/summary/wrong_result_count").and_then(Value::as_u64), Some(0)); - assert_eq!(report.pointer("/summary/knowledge/page_count").and_then(Value::as_u64), Some(4)); + assert_eq!(report.pointer("/summary/knowledge/page_count").and_then(Value::as_u64), Some(5)); assert_eq!( report.pointer("/summary/knowledge/section_count").and_then(Value::as_u64), - Some(10) + Some(13) ); assert_eq!( report.pointer("/summary/knowledge/citation_coverage").and_then(Value::as_f64), - Some(0.9) + Some(0.923) ); assert_eq!( report.pointer("/summary/knowledge/stale_claim_detection").and_then(Value::as_f64), @@ -2705,11 +2717,11 @@ fn knowledge_fixtures_report_page_metrics() -> Result<()> { ); assert_eq!( report.pointer("/summary/knowledge/backlink_count").and_then(Value::as_u64), - Some(9) + Some(11) ); assert_eq!( report.pointer("/summary/knowledge/pages_with_backlinks").and_then(Value::as_u64), - Some(4) + Some(5) ); assert_eq!( report.pointer("/summary/knowledge/backlink_coverage").and_then(Value::as_f64), @@ -2717,7 +2729,11 @@ fn knowledge_fixtures_report_page_metrics() -> Result<()> { ); assert_eq!( report.pointer("/summary/knowledge/page_usefulness").and_then(Value::as_f64), - Some(0.969) + Some(0.979) + ); + assert_eq!( + report.pointer("/summary/knowledge/pages_with_version_diff").and_then(Value::as_u64), + Some(1) ); assert_eq!( report.pointer("/summary/knowledge/unsupported_summary_count").and_then(Value::as_u64), @@ -2732,10 +2748,11 @@ fn knowledge_fixtures_report_page_metrics() -> Result<()> { let knowledge_suite = find_by_field(suites, "/suite_id", "knowledge_compilation")?; assert_eq!(knowledge_suite.pointer("/status").and_then(Value::as_str), Some("pass")); - assert_eq!(knowledge_suite.pointer("/encoded_job_count").and_then(Value::as_u64), Some(2)); + assert_eq!(knowledge_suite.pointer("/encoded_job_count").and_then(Value::as_u64), Some(3)); let jobs = array_at(&report, "/jobs")?; let project_page_job = find_by_field(jobs, "/job_id", "knowledge-project-page-001")?; + let watch_rebuild_job = find_by_field(jobs, "/job_id", "knowledge-watch-rebuild-003")?; assert_eq!( project_page_job.pointer("/knowledge/unsupported_summary_count").and_then(Value::as_u64), @@ -2745,6 +2762,18 @@ fn knowledge_fixtures_report_page_metrics() -> Result<()> { project_page_job.pointer("/knowledge/untraced_section_count").and_then(Value::as_u64), Some(0) ); + assert_eq!( + watch_rebuild_job.pointer("/knowledge/pages_with_version_diff").and_then(Value::as_u64), + Some(1) + ); + assert!( + watch_rebuild_job + .pointer("/produced_answer") + .and_then(Value::as_str) + .is_some_and(|answer| answer + .contains("PageIndex/OpenKB adapter claim as lint evidence") + && answer.contains("leaves source documents plus Memory Notes unmodified")) + ); Ok(()) } @@ -4024,6 +4053,103 @@ fn agent_knowledge_os_closeout_benchmark_wires_docs_and_optimization_queue() -> Ok(()) } +#[test] +fn p2_knowledge_workspace_closeout_preserves_pageindex_openkb_boundaries() -> Result<()> { + let report = serde_json::from_str::(&fs::read_to_string( + p2_knowledge_workspace_pageindex_openkb_closeout_report_json_path()?, + )?)?; + let markdown = fs::read_to_string( + p2_knowledge_workspace_pageindex_openkb_closeout_report_markdown_path()?, + )?; + let makefile = fs::read_to_string(workspace_root()?.join("Makefile.toml"))?; + let benchmarking_index = fs::read_to_string(benchmarking_index_path()?)?; + let readme = fs::read_to_string(readme_path()?)?; + let benchmark_runbook = fs::read_to_string( + workspace_root()? + .join("docs") + .join("runbook") + .join("benchmarking") + .join("real_world_agent_memory_benchmark.md"), + )?; + + assert_eq!( + report.pointer("/schema").and_then(Value::as_str), + Some("elf.p2_knowledge_workspace_pageindex_openkb_closeout_report/v1") + ); + assert_eq!(report.pointer("/authority").and_then(Value::as_str), Some("XY-1066")); + assert_eq!( + report.pointer("/self_assessment/verdict").and_then(Value::as_str), + Some("pass_with_reference_only_competitor_boundary") + ); + assert_eq!(report.pointer("/typed_state_summary/pass").and_then(Value::as_u64), Some(2)); + assert_eq!( + report.pointer("/typed_state_summary/wrong_result").and_then(Value::as_u64), + Some(0) + ); + assert_eq!(report.pointer("/typed_state_summary/incomplete").and_then(Value::as_u64), Some(0)); + assert_eq!(report.pointer("/typed_state_summary/blocked").and_then(Value::as_u64), Some(1)); + assert_eq!(report.pointer("/typed_state_summary/not_tested").and_then(Value::as_u64), Some(2)); + + let results = array_at(&report, "/elf_same_corpus_results")?; + let source_library = find_by_field(results, "/suite", "source_library")?; + let knowledge = find_by_field(results, "/suite", "knowledge_compilation")?; + + assert_eq!(source_library.pointer("/status").and_then(Value::as_str), Some("pass")); + assert_eq!(source_library.pointer("/jobs").and_then(Value::as_u64), Some(2)); + assert_eq!(knowledge.pointer("/status").and_then(Value::as_str), Some("pass")); + assert_eq!(knowledge.pointer("/jobs").and_then(Value::as_u64), Some(3)); + assert!(array_contains_str( + knowledge, + "/coverage", + "Changed-source watch/rebuild reports changed, stale, and reviewable memory-candidate outputs without source mutation." + )?); + + let matrix = array_at(&report, "/comparison_matrix")?; + let pageindex = find_by_field(matrix, "/target", "VectifyAI PageIndex")?; + let openkb = find_by_field(matrix, "/target", "VectifyAI OpenKB")?; + let p3 = find_by_field(matrix, "/target", "P3 PageIndex/OpenKB adapter queue")?; + + assert_eq!(pageindex.pointer("/status").and_then(Value::as_str), Some("not_tested")); + assert_eq!(openkb.pointer("/status").and_then(Value::as_str), Some("not_tested")); + assert_eq!(p3.pointer("/status").and_then(Value::as_str), Some("blocked")); + assert_eq!( + report + .pointer("/p3_queue_decision/ready_to_queue_after_main_thread_acceptance") + .and_then(Value::as_bool), + Some(true) + ); + assert_eq!( + report.pointer("/p3_queue_decision/queued_label_applied").and_then(Value::as_bool), + Some(false) + ); + assert!(array_contains_str( + &report, + "/claim_boundaries/not_allowed", + "Do not claim ELF beats PageIndex or OpenKB." + )?); + assert!(array_contains_str( + &report, + "/claim_boundaries/not_allowed", + "Do not queue a P3 issue in this lane." + )?); + assert!(markdown.contains("P2 Knowledge Workspace PageIndex/OpenKB Closeout Report")); + assert!(markdown.contains("VectifyAI PageIndex")); + assert!(markdown.contains("VectifyAI OpenKB")); + assert!(markdown.contains("This report does not apply `decodex:queued:elf`")); + assert!(makefile.contains("[tasks.real-world-memory-p2-knowledge-closeout]")); + assert!(makefile.contains("\"real-world-memory-source-library-report\"")); + assert!(makefile.contains("\"real-world-memory-knowledge-report\"")); + assert!( + benchmarking_index + .contains("2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.md") + ); + assert!(readme.contains("P2 Knowledge Workspace PageIndex/OpenKB closeout after XY-1066")); + assert!(readme.contains("real-world-memory-p2-knowledge-closeout")); + assert!(benchmark_runbook.contains("cargo make real-world-memory-p2-knowledge-closeout")); + + Ok(()) +} + #[test] fn operator_approved_public_proxy_private_addendum_preserves_boundary() -> Result<()> { let report = serde_json::from_str::(&fs::read_to_string( @@ -6147,10 +6273,11 @@ fn knowledge_json_report_renders_markdown_metrics() -> Result<()> { assert!(markdown.contains("Knowledge Page Metrics")); assert!(markdown.contains("Knowledge citation coverage")); - assert!(markdown.contains("Backlinks: `9` total")); + assert!(markdown.contains("Backlinks: `11` total")); assert!(markdown.contains("Unsupported summary count")); assert!(markdown.contains("knowledge-project-page-001")); assert!(markdown.contains("knowledge-entity-concept-002")); + assert!(markdown.contains("knowledge-watch-rebuild-003")); Ok(()) } @@ -7080,18 +7207,18 @@ fn context_trajectory_fixtures_report_blocked_openviking_gates() -> Result<()> { } fn assert_root_knowledge_summary(report: &Value) { - assert_eq!(report.pointer("/summary/knowledge/job_count").and_then(Value::as_u64), Some(2)); - assert_eq!(report.pointer("/summary/knowledge/page_count").and_then(Value::as_u64), Some(4)); + assert_eq!(report.pointer("/summary/knowledge/job_count").and_then(Value::as_u64), Some(3)); + assert_eq!(report.pointer("/summary/knowledge/page_count").and_then(Value::as_u64), Some(5)); assert_eq!( report.pointer("/summary/knowledge/page_usefulness").and_then(Value::as_f64), - Some(0.969) + Some(0.979) ); } fn assert_root_aggregate_summary(report: &Value) { - assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(66)); + assert_eq!(report.pointer("/summary/job_count").and_then(Value::as_u64), Some(67)); assert_eq!(report.pointer("/summary/encoded_suite_count").and_then(Value::as_u64), Some(17)); - assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(59)); + assert_eq!(report.pointer("/summary/pass").and_then(Value::as_u64), Some(60)); assert_eq!(report.pointer("/summary/wrong_result").and_then(Value::as_u64), Some(0)); assert_eq!(report.pointer("/summary/incomplete").and_then(Value::as_u64), Some(0)); assert_eq!(report.pointer("/summary/blocked").and_then(Value::as_u64), Some(7)); @@ -7134,11 +7261,11 @@ fn assert_root_aggregate_summary(report: &Value) { ); assert_eq!( report.pointer("/summary/evidence_required_count").and_then(Value::as_u64), - Some(149) + Some(152) ); assert_eq!( report.pointer("/summary/evidence_covered_count").and_then(Value::as_u64), - Some(149) + Some(152) ); assert_eq!(report.pointer("/summary/evidence_coverage").and_then(Value::as_f64), Some(1.0)); assert_eq!(report.pointer("/summary/source_ref_coverage").and_then(Value::as_f64), Some(1.0)); diff --git a/docs/evidence/benchmarking/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.md b/docs/evidence/benchmarking/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.md new file mode 100644 index 00000000..11f3ec02 --- /dev/null +++ b/docs/evidence/benchmarking/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.md @@ -0,0 +1,156 @@ +--- +type: Evidence +title: "P2 Knowledge Workspace PageIndex/OpenKB Closeout Report - June 22, 2026" +description: "Self-assessment and benchmark evidence for the P2 Knowledge Workspace closeout against PageIndex/OpenKB-style strengths." +resource: docs/evidence/benchmarking/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.md +status: active +authority: evidence +owner: benchmarking +last_verified: 2026-06-22 +tags: + - docs + - evidence + - benchmarking + - knowledge-workspace +source_refs: + - apps/elf-eval/fixtures/report_snapshots/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.json +code_refs: + - Makefile.toml + - apps/elf-eval/fixtures/real_world_memory/source_library/ + - apps/elf-eval/fixtures/real_world_memory/knowledge/ +related: + - docs/spec/agent_memory_knowledge_system_v1.md + - docs/spec/system_knowledge_pages_v1.md + - docs/spec/real_world_agent_memory_benchmark_v1.md + - docs/runbook/benchmarking/real_world_agent_memory_benchmark.md +drift_watch: + - docs/evidence/benchmarking/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.md + - apps/elf-eval/fixtures/report_snapshots/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.json + - apps/elf-eval/fixtures/real_world_memory/source_library/ + - apps/elf-eval/fixtures/real_world_memory/knowledge/ + - Makefile.toml +--- +# P2 Knowledge Workspace PageIndex/OpenKB Closeout Report - June 22, 2026 + +Purpose: Close XY-1066 by measuring ELF Knowledge Workspace and Source Library +behavior against PageIndex/OpenKB-style strengths without converting reference-only +competitors into wins. +Status: evidence +Read this when: You need to decide what ELF can prove for long-document sources, +derived pages, lint, watch/rebuild, and source refs before queuing P3 adapter work. +Not this document: A contained PageIndex/OpenKB adapter result, live private-corpus +proof, or product UI readback. +Inputs: `apps/elf-eval/fixtures/real_world_memory/source_library/`, +`apps/elf-eval/fixtures/real_world_memory/knowledge/`, and +`apps/elf-eval/fixtures/report_snapshots/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.json`. + +## Command + +```sh +cargo make real-world-memory-p2-knowledge-closeout +``` + +The command runs the same-corpus ELF fixture slices and writes: + +- `tmp/real-world-memory/source-library-report.json` +- `tmp/real-world-memory/source-library-report.md` +- `tmp/real-world-memory/knowledge-report.json` +- `tmp/real-world-memory/knowledge-report.md` + +The checked-in JSON snapshot for this closeout is: + +- `apps/elf-eval/fixtures/report_snapshots/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.json` + +## Result + +The P2 closeout self-assessment passes for ELF-owned fixture evidence: + +| Slice | Status | Jobs | Pass | Wrong result | Incomplete | Blocked | Not tested | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | +| `source_library` | `pass` | 2 | 2 | 0 | 0 | 0 | 0 | +| `knowledge_compilation` | `pass` | 3 | 3 | 0 | 0 | 0 | 0 | +| VectifyAI PageIndex | `not_tested` | 0 | 0 | 0 | 0 | 0 | 1 | +| VectifyAI OpenKB | `not_tested` | 0 | 0 | 0 | 0 | 0 | 1 | +| P3 adapter queue | `blocked` | 0 | 0 | 0 | 0 | 1 | 0 | + +Typed state summary: 2 pass, 0 wrong_result, 0 incomplete, 1 blocked, and 2 +not_tested rows. + +## What ELF Can Prove + +- Long-document source handling: Source Library fixtures preserve canonical source + metadata, source refs, hydrated excerpts, and the source-only boundary before any + memory promotion. +- Derived pages: Knowledge fixtures compile project, entity, concept, and issue + timeline pages with citations, backlinks, unsupported-section flags, and stale lint. +- Watch/rebuild: The changed-source fixture selects cited pages, reports changed and + stale sections, emits previous-version diff metadata, and routes memory candidates + through review instead of mutating source records or Memory Notes. +- Source refs: Both slices require source refs and quote-backed evidence; generated + pages and source records stay pointer-backed benchmark artifacts. + +## PageIndex/OpenKB Boundary + +PageIndex remains a reference for vectorless long-document tree retrieval, +long-PDF traversal, cited node paths, and MCP product behavior. This closeout does +not run PageIndex and does not score PageIndex artifacts. + +OpenKB remains a reference for document-to-wiki compilation, concept/entity pages, +saved explorations, lint, watch, and recompile workflows. This closeout does not run +OpenKB and does not score OpenKB artifacts. + +Because no contained PageIndex/OpenKB adapter emits same-corpus artifacts here, both +reference projects remain `not_tested`. That is intentional: the report compares ELF +outputs to reference expectations, not to product outputs. + +## Self-Assessment + +Verdict: `pass_with_reference_only_competitor_boundary`. + +Improved: + +- The closeout now has a dedicated `cargo make real-world-memory-p2-knowledge-closeout` + command that reruns the source-library and knowledge fixture slices together. +- The knowledge slice now includes a changed-source watch/rebuild fixture with stale + lint, version diff, and reviewed memory-candidate boundaries. +- The report names PageIndex/OpenKB expectations without upgrading them to win/tie/loss + claims. + +Stayed bounded: + +- This is fixture-backed same-corpus ELF evidence, not a live external adapter run. +- PageIndex and OpenKB remain `not_tested` until contained adapters emit comparable + tree/wiki artifacts, source refs, lint/watch output, and typed benchmark states. +- This does not prove private-corpus, hosted-provider, or product UI quality. + +Regressed: + +- No regression is detected in this closeout slice: source-library and knowledge + fixture rows are pass, with zero wrong_result and zero unsupported-claim states. + +## P3 Queue Decision + +P3 PageIndex/OpenKB adapter work is decision-ready after main-thread acceptance of +this closeout. The next issue should be one contained adapter task that emits +same-corpus PageIndex tree artifacts and OpenKB wiki artifacts with source refs, +lint/watch output, and typed pass/wrong_result/incomplete/blocked/not_tested states. + +This report does not apply `decodex:queued:elf` to any P3 issue. + +## Claim Boundary + +Allowed: + +- ELF P2 Knowledge Workspace closeout passes its checked-in source-library and + knowledge fixture self-assessment. +- ELF can compare its outputs to PageIndex/OpenKB-style expectations only as reference + expectations in this closeout. +- P3 adapter work is decision-ready after main-thread acceptance. + +Not allowed: + +- Do not claim ELF beats PageIndex or OpenKB. +- Do not treat reference-only PageIndex/OpenKB rows as pass evidence. +- Do not queue a P3 issue in this lane. +- Do not claim live private-corpus, hosted-provider, or product UI quality from this + fixture-backed closeout. diff --git a/docs/evidence/benchmarking/index.md b/docs/evidence/benchmarking/index.md index 0b26bae6..c23fc757 100644 --- a/docs/evidence/benchmarking/index.md +++ b/docs/evidence/benchmarking/index.md @@ -50,3 +50,4 @@ Routes to: Benchmarking evidence concepts under `docs/evidence/benchmarking/`. - `2026-06-20-recall-debug-panel-report.md`: Recall Debug Panel Report - June 20, 2026; adds `elf.recall_debug_panel/v1` as a typed cross-layer readback over memory traces, Source Library document candidates, Knowledge Workspace pages, graph facts, and Dreaming proposals while preserving not-requested and non-pass evidence classes. - `2026-06-20-agent-knowledge-os-closeout-benchmark-report.md`: Agent Knowledge OS Closeout Benchmark Report - June 20, 2026; publishes the XY-1023 full product/scenario matrix, names ELF as the strongest measured integrated product, preserves qmd/OpenViking/mem0/OpenMemory/Letta/graph-RAG/VectifyAI strengths, and turns material non-pass or reference-only deltas into optimization queue items. - `2026-06-22-p1-memory-authority-closeout-report.md`: P1 Memory Authority Closeout Report - June 22, 2026; adds `cargo make real-world-memory-p1-closeout`, scores the P1 Source Library -> Memory Candidate -> approved memory -> recall/debug -> correction/rollback chain as 4 pass, and keeps P2 queueing conditional on main-thread acceptance. +- `2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.md`: P2 Knowledge Workspace PageIndex/OpenKB Closeout Report - June 22, 2026; adds `cargo make real-world-memory-p2-knowledge-closeout`, scores the Source Library and Knowledge Workspace fixture slices as pass, preserves PageIndex/OpenKB as `not_tested` reference-only rows, and keeps P3 adapter queueing behind main-thread acceptance. diff --git a/docs/log.md b/docs/log.md index fdae139f..1f86b05f 100644 --- a/docs/log.md +++ b/docs/log.md @@ -87,6 +87,10 @@ logs. `cargo make real-world-memory-p1-closeout`, preserving the Source Library -> Memory Candidate -> approved memory -> recall/debug -> correction/rollback authority chain and keeping P2 queueing conditional on main-thread acceptance. +- Added the P2 Knowledge Workspace PageIndex/OpenKB closeout report for XY-1066, + plus `cargo make real-world-memory-p2-knowledge-closeout` and a changed-source + watch/rebuild fixture, preserving PageIndex/OpenKB as reference-only `not_tested` + rows until contained adapters emit comparable artifacts. - Added the Knowledge Workspace changed-source watch/rebuild contract for XY-1065, plus a drift audit covering the new admin rebuild endpoint, changed/unchanged/ stale/blocked section output, stale-section/changed-claim/missing-citation/conflict diff --git a/docs/runbook/benchmarking/real_world_agent_memory_benchmark.md b/docs/runbook/benchmarking/real_world_agent_memory_benchmark.md index b1020e70..a8e1b308 100644 --- a/docs/runbook/benchmarking/real_world_agent_memory_benchmark.md +++ b/docs/runbook/benchmarking/real_world_agent_memory_benchmark.md @@ -226,6 +226,34 @@ source mutations. It is fixture-backed closeout evidence only; it does not claim live adapter sweep, private-corpus quality, provider-backed quality, or broad competitor wins. +Current checked-in P2 Knowledge Workspace closeout increment: + +```sh +cargo make real-world-memory-p2-knowledge-closeout +``` + +This runs the checked-in Source Library and Knowledge Workspace fixture slices: + +```text +tmp/real-world-memory/source-library-report.json +tmp/real-world-memory/source-library-report.md +tmp/real-world-memory/knowledge-report.json +tmp/real-world-memory/knowledge-report.md +``` + +The checked-in evidence report is +`docs/evidence/benchmarking/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.md`, +and the checked-in JSON snapshot is +`apps/elf-eval/fixtures/report_snapshots/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.json`. + +The increment scores the Source Library slice as 2 pass and the Knowledge Workspace +slice as 3 pass. It covers long-document source refs, hydrated excerpts, +project/entity/concept/issue pages, stale lint, changed-source watch/rebuild, +previous-version diff metadata, and reviewable memory-candidate boundaries. VectifyAI +PageIndex and OpenKB stay `not_tested` reference-only rows until contained adapters +emit comparable tree/wiki artifacts, source refs, lint/watch output, and typed +benchmark states. This closeout does not queue any P3 issue. + Current checked-in project-decisions increment: ```sh @@ -481,11 +509,12 @@ cargo make real-world-memory-knowledge This parses `apps/elf-eval/fixtures/real_world_memory/knowledge/`, writes `tmp/real-world-memory/knowledge-report.json`, and renders `tmp/real-world-memory/knowledge-report.md`. The fixtures include synthetic project, -entity, concept, and issue-timeline page artifacts. Generated pages are benchmark -artifacts only: every section must cite source evidence or timeline events, or it must -be explicitly flagged unsupported. The report publishes citation coverage, stale claim -detection, rebuild determinism, aggregate backlink counts and page coverage, page -usefulness, unsupported summary count, and untraced section count. +entity, concept, issue-timeline, and changed-source watch/rebuild page artifacts. +Generated pages are benchmark artifacts only: every section must cite source evidence +or timeline events, or it must be explicitly flagged unsupported. The report publishes +citation coverage, stale claim detection, rebuild determinism, aggregate backlink +counts and page coverage, previous-version diff coverage, page usefulness, +unsupported summary count, and untraced section count. Current live knowledge-page rebuild/lint increment: