diff --git a/Makefile.toml b/Makefile.toml index a45ec771..51b5e03d 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -48,6 +48,9 @@ # | real-world-memory-live-adapters | command | | # | real-world-memory-live-consolidation | command | | # | real-world-memory-live-knowledge | command | | +# | real-world-memory-pageindex-openkb | composite | | +# | real-world-memory-pageindex-openkb-json | command | | +# | real-world-memory-pageindex-openkb-report | command | | # | real-world-memory-proactive-brief | composite | | # | real-world-memory-proactive-brief-json | command | | # | real-world-memory-proactive-brief-report | command | | @@ -646,6 +649,63 @@ args = [ "tmp/real-world-memory/knowledge-report.md", ] +[tasks.real-world-memory-pageindex-openkb] +workspace = false +dependencies = [ + "real-world-memory-pageindex-openkb-report", +] + +[tasks.real-world-memory-pageindex-openkb-json] +workspace = false +command = "cargo" +args = [ + "run", + "-p", + "elf-eval", + "--bin", + "real_world_job_benchmark", + "--", + "run", + "--fixtures", + "apps/elf-eval/fixtures/real_world_external_adapters/pageindex_openkb", + "--out", + "tmp/real-world-memory/pageindex-openkb/report.json", + "--run-id", + "real-world-memory-pageindex-openkb", + "--adapter-id", + "fixture_pageindex_openkb_same_corpus", + "--adapter-name", + "PageIndex/OpenKB same-corpus adapter blockers", + "--adapter-behavior", + "typed_setup_blocker", + "--adapter-storage-status", + "blocked", + "--adapter-runtime-status", + "blocked", + "--adapter-notes", + "Offline fixtures preserve same-corpus PageIndex/OpenKB setup blockers until contained product artifacts map to ELF source ids.", +] + +[tasks.real-world-memory-pageindex-openkb-report] +workspace = false +dependencies = [ + "real-world-memory-pageindex-openkb-json", +] +command = "cargo" +args = [ + "run", + "-p", + "elf-eval", + "--bin", + "real_world_job_benchmark", + "--", + "publish", + "--report", + "tmp/real-world-memory/pageindex-openkb/report.json", + "--out", + "tmp/real-world-memory/pageindex-openkb/report.md", +] + [tasks.real-world-memory-source-library] workspace = false dependencies = [ diff --git a/README.md b/README.md index ef6536bc..8956304d 100644 --- a/README.md +++ b/README.md @@ -271,6 +271,14 @@ provider-backed ELF evidence was required. reviewable memory-candidate boundaries. VectifyAI PageIndex and OpenKB remain `not_tested` reference-only rows until contained adapters emit comparable tree/wiki artifacts; no P3 issue is queued by this closeout. +- PageIndex/OpenKB same-corpus adapter blockers after XY-1068: the June 22 follow-up + adds `cargo make real-world-memory-pageindex-openkb`, two checked-in typed setup + blocker fixtures, and a checked-in evidence report. PageIndex is blocked until + tree artifacts, cited node paths, traversal output, and MCP readback map to ELF + Source Library source ids. OpenKB is blocked until wiki pages, entity/concept + indexes, lint output, saved exploration state, and watch/recompile traces map to + ELF Knowledge Workspace source ids. The report makes no PageIndex/OpenKB parity, + win, tie, or loss claim. - Operator-approved public-proxy addendum after XY-930: the June 19 follow-up runs `cargo make baseline-production-private-addendum` with a simulated/public-proxy production corpus manifest approved for this stage. The run records 12 documents, @@ -404,6 +412,7 @@ Detailed evidence and interpretation: - [Agent Knowledge OS Closeout Benchmark Report - June 20, 2026](docs/evidence/benchmarking/2026-06-20-agent-knowledge-os-closeout-benchmark-report.md) - [P1 Memory Authority Closeout Report - June 22, 2026](docs/evidence/benchmarking/2026-06-22-p1-memory-authority-closeout-report.md) - [P2 Knowledge Workspace PageIndex/OpenKB Closeout Report - June 22, 2026](docs/evidence/benchmarking/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.md) +- [PageIndex/OpenKB Same-Corpus Adapter Report - June 22, 2026](docs/evidence/benchmarking/2026-06-22-pageindex-openkb-same-corpus-adapter-report.md) - [Live Baseline Benchmark Runbook](docs/runbook/benchmarking/live_baseline_benchmark.md) - [Single-User Production Runbook](docs/runbook/single_user_production.md) - Benchmark contract: @@ -510,8 +519,9 @@ Latest real-world benchmark report: June 22, 2026. Latest external research refr June 11, 2026; June 20 adds the Agent Knowledge OS Closeout Benchmark Report, the Graph Topic-Map Report - June 20, 2026, Knowledge Workspace Version-Diff Report - June 20, 2026, and the Live Knowledge-Page Rebuild/Lint Report - June 20, -2026; June 22 adds the P1 Memory Authority Closeout Report and P2 Knowledge -Workspace PageIndex/OpenKB Closeout Report after the June 19 +2026; June 22 adds the P1 Memory Authority Closeout Report, P2 Knowledge +Workspace PageIndex/OpenKB Closeout Report, and PageIndex/OpenKB Same-Corpus Adapter +Report after the June 19 XY-930 operator-approved public-proxy production addendum and service-native Dreaming readback, the qmd debug-ergonomics Dreaming retest, the June 17 competitor-strength closeout, and the June 16 temporal reconciliation, live consolidation self-check, diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/pageindex_openkb/openkb_wiki_recompile_blocked.json b/apps/elf-eval/fixtures/real_world_external_adapters/pageindex_openkb/openkb_wiki_recompile_blocked.json new file mode 100644 index 00000000..7b6302c6 --- /dev/null +++ b/apps/elf-eval/fixtures/real_world_external_adapters/pageindex_openkb/openkb_wiki_recompile_blocked.json @@ -0,0 +1,307 @@ +{ + "schema": "elf.real_world_job/v1", + "job_id": "pageindex-openkb-openkb-wiki-blocked-001", + "suite": "knowledge_compilation", + "title": "Block OpenKB same-corpus wiki comparison until pages, lint, and recompile trace map to ELF source ids", + "encoding": { + "status": "blocked", + "reason": "VectifyAI OpenKB same-corpus scoring is blocked until a contained OpenKB run emits generated wiki pages, concept/entity indexes, lint output, saved exploration state, and watch/recompile traces mapped to the ELF Knowledge Workspace source ids.", + "follow_up": { + "title": "Run OpenKB same-corpus wiki and watch/recompile adapter", + "reason": "The fair comparison needs OpenKB wiki/entity/concept outputs and watch/recompile artifacts over the same knowledge corpus with source ids mapped to benchmark evidence ids." + } + }, + "corpus": { + "corpus_id": "real-world-memory-knowledge-synthetic-2026-06-22", + "profile": "external_adapter", + "items": [ + { + "evidence_id": "elf-knowledge-project-page-output", + "kind": "elf_same_corpus_output", + "text": "ELF same-corpus knowledge output: knowledge job knowledge-project-page-001 materializes page project:elf-benchmark-suite at path apps/elf-eval/fixtures/real_world_memory/knowledge/pages/project_elf_benchmark_suite.md with source ids elf-knowledge-current-truth, elf-knowledge-history, and xy848-issue-timeline.", + "source_ref": { + "schema": "source_ref/v1", + "resolver": "real_world_job_fixture/v1", + "ref": { + "fixture": "project_page_rebuild", + "page_id": "project:elf-benchmark-suite" + }, + "locator": { + "quote": "project:elf-benchmark-suite" + } + }, + "created_at": "2026-06-09T01:10:00Z" + }, + { + "evidence_id": "elf-knowledge-entity-concept-output", + "kind": "elf_same_corpus_output", + "text": "ELF same-corpus knowledge output: knowledge job knowledge-entity-concept-002 materializes entity:qdrant-rebuild, concept:derived-knowledge-pages, and issue:xy848-knowledge-pages pages with source ids qdrant-rebuild-entity, derived-pages-concept, and xy848-current-timeline.", + "source_ref": { + "schema": "source_ref/v1", + "resolver": "real_world_job_fixture/v1", + "ref": { + "fixture": "entity_concept_issue_pages", + "page_ids": [ + "entity:qdrant-rebuild", + "concept:derived-knowledge-pages", + "issue:xy848-knowledge-pages" + ] + }, + "locator": { + "quote": "entity:qdrant-rebuild, concept:derived-knowledge-pages" + } + }, + "created_at": "2026-06-09T02:10:00Z" + }, + { + "evidence_id": "elf-knowledge-watch-rebuild-output", + "kind": "elf_same_corpus_output", + "text": "ELF same-corpus watch output: knowledge job knowledge-watch-rebuild-003 materializes changed-source watch/rebuild with source ids watch-source-original, watch-source-updated, watch-lint-output, and watch-memory-candidate-proposal.", + "source_ref": { + "schema": "source_ref/v1", + "resolver": "real_world_job_fixture/v1", + "ref": { + "fixture": "changed_source_watch_rebuild", + "page_id": "project:knowledge-watch-rebuild" + }, + "locator": { + "quote": "changed-source watch/rebuild" + } + }, + "created_at": "2026-06-22T01:12:00Z" + }, + { + "evidence_id": "openkb-adapter-setup-blocker", + "kind": "adapter_blocker", + "text": "OpenKB setup blocker: XY-1068 has no Docker-contained OpenKB product run, no generated wiki page export, no entity/concept index export, and no watch/recompile trace mapped back to the same corpus source ids.", + "source_ref": { + "schema": "source_ref/v1", + "resolver": "real_world_job_fixture/v1", + "ref": { + "fixture": "openkb_wiki_recompile_blocked", + "evidence_id": "openkb-adapter-setup-blocker" + }, + "locator": { + "quote": "no generated wiki page export" + } + }, + "created_at": "2026-06-22T03:05:00Z" + }, + { + "evidence_id": "openkb-required-materialized-output-contract", + "kind": "adapter_contract", + "text": "OpenKB artifact contract: a runnable same-corpus adapter must emit wiki pages, concept/entity indexes, lint output, saved exploration state, watch/recompile trace, source ids, and setup/runtime metadata before OpenKB can be scored.", + "source_ref": { + "schema": "source_ref/v1", + "resolver": "real_world_job_fixture/v1", + "ref": { + "fixture": "openkb_wiki_recompile_blocked", + "evidence_id": "openkb-required-materialized-output-contract" + }, + "locator": { + "quote": "wiki pages, concept/entity indexes, lint output" + } + }, + "created_at": "2026-06-22T03:06:00Z" + }, + { + "evidence_id": "openkb-parity-decoy", + "kind": "unsupported_claim", + "text": "Decoy: the ELF Knowledge Workspace page pass proves OpenKB document-to-wiki and recompile parity without running OpenKB.", + "source_ref": { + "schema": "source_ref/v1", + "resolver": "real_world_job_fixture/v1", + "ref": { + "fixture": "openkb_wiki_recompile_blocked", + "evidence_id": "openkb-parity-decoy" + } + }, + "created_at": "2026-06-22T03:04:00Z" + } + ], + "adapter_response": { + "adapter_id": "fixture_pageindex_openkb_same_corpus", + "answer": { + "content": "OpenKB remains blocked on the same knowledge corpus. ELF materializes project, entity, concept, issue, and changed-source watch/rebuild outputs with source ids including elf-knowledge-current-truth, qdrant-rebuild-entity, derived-pages-concept, xy848-current-timeline, watch-source-updated, watch-lint-output, and watch-memory-candidate-proposal. No contained OpenKB run has emitted wiki page export, entity/concept index export, saved exploration state, lint output, or watch/recompile trace mapped to those source ids. No OpenKB parity, win, tie, or loss is claimed.", + "claims": [ + { + "claim_id": "elf_knowledge_outputs_present", + "text": "ELF materializes same-corpus project, entity, concept, issue, and changed-source watch/rebuild knowledge outputs.", + "evidence_ids": [ + "elf-knowledge-project-page-output", + "elf-knowledge-entity-concept-output", + "elf-knowledge-watch-rebuild-output" + ], + "confidence": "high" + }, + { + "claim_id": "openkb_setup_blocked", + "text": "OpenKB scoring is blocked because no contained OpenKB product run, wiki export, entity/concept index export, or watch/recompile trace is checked in.", + "evidence_ids": ["openkb-adapter-setup-blocker"], + "confidence": "high" + }, + { + "claim_id": "openkb_artifact_contract", + "text": "A runnable OpenKB adapter must emit wiki pages, concept/entity indexes, lint output, saved exploration state, watch/recompile trace, source ids, and setup/runtime metadata before scoring.", + "evidence_ids": ["openkb-required-materialized-output-contract"], + "confidence": "high" + } + ], + "evidence_ids": [ + "elf-knowledge-project-page-output", + "elf-knowledge-entity-concept-output", + "elf-knowledge-watch-rebuild-output", + "openkb-adapter-setup-blocker", + "openkb-required-materialized-output-contract" + ], + "latency_ms": 1.0, + "cost": { + "currency": "USD", + "amount": 0.0, + "input_tokens": 0, + "output_tokens": 0 + } + } + } + }, + "timeline": [ + { + "event_id": "elf-knowledge-outputs-selected", + "ts": "2026-06-22T03:05:00Z", + "actor": "benchmark", + "action": "selected_same_corpus_knowledge_outputs", + "evidence_ids": [ + "elf-knowledge-project-page-output", + "elf-knowledge-entity-concept-output", + "elf-knowledge-watch-rebuild-output" + ], + "summary": "The OpenKB comparison uses the same project/entity/concept/issue and watch/rebuild corpus ids as the ELF knowledge fixtures." + }, + { + "event_id": "openkb-blocker-recorded", + "ts": "2026-06-22T03:06:00Z", + "actor": "benchmark", + "action": "recorded_openkb_setup_blocker", + "evidence_ids": [ + "openkb-adapter-setup-blocker", + "openkb-required-materialized-output-contract" + ], + "summary": "OpenKB remains blocked until wiki, lint, saved exploration, and watch/recompile artifacts map back to same-corpus source ids." + } + ], + "prompt": { + "role": "user", + "content": "Compare OpenKB document-to-wiki, concept/entity, lint, watch, and recompile behavior against ELF Knowledge Workspace only if OpenKB emits same-corpus artifacts.", + "job_mode": "compare", + "constraints": [ + "cite_same_corpus_source_ids", + "require_openkb_wiki_artifacts", + "typed_setup_blocker", + "no_openkb_parity_claim" + ] + }, + "expected_answer": { + "must_include": [ + { + "claim_id": "elf_knowledge_outputs_present", + "text": "ELF materializes same-corpus project, entity, concept, issue, and changed-source watch/rebuild knowledge outputs." + }, + { + "claim_id": "openkb_setup_blocked", + "text": "OpenKB scoring is blocked because no contained OpenKB product run, wiki export, entity/concept index export, or watch/recompile trace is checked in." + }, + { + "claim_id": "openkb_artifact_contract", + "text": "A runnable OpenKB adapter must emit wiki pages, concept/entity indexes, lint output, saved exploration state, watch/recompile trace, source ids, and setup/runtime metadata before scoring." + } + ], + "must_not_include": [ + "ELF Knowledge Workspace page pass proves OpenKB document-to-wiki and recompile parity.", + "ELF beats OpenKB on wiki compilation." + ], + "evidence_links": { + "elf_knowledge_outputs_present": [ + "elf-knowledge-project-page-output", + "elf-knowledge-entity-concept-output", + "elf-knowledge-watch-rebuild-output" + ], + "openkb_setup_blocked": ["openkb-adapter-setup-blocker"], + "openkb_artifact_contract": ["openkb-required-materialized-output-contract"] + }, + "answer_type": "typed_blocker", + "accepted_alternates": [], + "requires_caveat": true, + "requires_refusal": false + }, + "required_evidence": [ + { + "evidence_id": "elf-knowledge-project-page-output", + "claim_id": "elf_knowledge_outputs_present", + "requirement": "cite", + "quote": "project:elf-benchmark-suite" + }, + { + "evidence_id": "openkb-adapter-setup-blocker", + "claim_id": "openkb_setup_blocked", + "requirement": "cite", + "quote": "no generated wiki page export" + }, + { + "evidence_id": "openkb-required-materialized-output-contract", + "claim_id": "openkb_artifact_contract", + "requirement": "cite", + "quote": "wiki pages, concept/entity indexes, lint output" + } + ], + "negative_traps": [ + { + "trap_id": "openkb-parity-without-run", + "type": "unsupported_claim", + "evidence_ids": ["openkb-parity-decoy"], + "failure_if_used": true + } + ], + "scoring_rubric": { + "dimensions": { + "uncertainty_handling": { + "weight": 0.35, + "max_points": 1.0, + "criteria": "Keeps OpenKB as blocked until contained wiki/recompile artifacts exist." + }, + "evidence_grounding": { + "weight": 0.3, + "max_points": 1.0, + "criteria": "Names the same-corpus ELF knowledge source ids and required OpenKB artifact ids." + }, + "workflow_helpfulness": { + "weight": 0.2, + "max_points": 1.0, + "criteria": "Identifies the concrete OpenKB output needed for rerun." + }, + "trap_avoidance": { + "weight": 0.15, + "max_points": 1.0, + "criteria": "Does not promote ELF knowledge page evidence into OpenKB parity." + } + }, + "pass_threshold": 0.85, + "hard_fail_rules": [ + "unsupported high-confidence claim about OpenKB parity", + "use of a negative trap marked failure_if_used = true" + ] + }, + "allowed_uncertainty": { + "can_answer_unknown": false, + "acceptable_phrases": [], + "fallback_action": "state_blocker" + }, + "tags": [ + "external_adapter", + "openkb", + "knowledge_compilation", + "wiki", + "watch_recompile", + "typed_blocker", + "same_corpus" + ] +} diff --git a/apps/elf-eval/fixtures/real_world_external_adapters/pageindex_openkb/pageindex_long_document_tree_blocked.json b/apps/elf-eval/fixtures/real_world_external_adapters/pageindex_openkb/pageindex_long_document_tree_blocked.json new file mode 100644 index 00000000..5dbd514c --- /dev/null +++ b/apps/elf-eval/fixtures/real_world_external_adapters/pageindex_openkb/pageindex_long_document_tree_blocked.json @@ -0,0 +1,281 @@ +{ + "schema": "elf.real_world_job/v1", + "job_id": "pageindex-openkb-pageindex-tree-blocked-001", + "suite": "source_library", + "title": "Block PageIndex same-corpus tree retrieval until node paths map to ELF source ids", + "encoding": { + "status": "blocked", + "reason": "VectifyAI PageIndex same-corpus scoring is blocked until a contained run emits long-document tree artifacts, cited node paths, and PageIndex MCP readback mapped to the ELF Source Library source ids.", + "follow_up": { + "title": "Run PageIndex same-corpus long-document tree adapter", + "reason": "The fair comparison needs PageIndex tree nodes and traversal output over the source-library long-document corpus with source ids mapped to benchmark evidence ids." + } + }, + "corpus": { + "corpus_id": "real-world-memory-source-library-2026-06-20", + "profile": "external_adapter", + "items": [ + { + "evidence_id": "elf-source-library-long-doc-source-record", + "kind": "elf_same_corpus_output", + "text": "ELF same-corpus source output: source_library job source-library-long-doc-001 materializes source id article-source-record with canonical_uri=https://example.com/research/agent-memory-os, source_kind=article, author=Example Research Group, captured_at=2026-06-20T01:10:00Z, and source_ref resolver elf_doc_ext/v1.", + "source_ref": { + "schema": "source_ref/v1", + "resolver": "real_world_job_fixture/v1", + "ref": { + "fixture": "long_document_source_library", + "evidence_id": "article-source-record" + }, + "locator": { + "quote": "canonical_uri=https://example.com/research/agent-memory-os" + } + }, + "created_at": "2026-06-20T01:10:00Z" + }, + { + "evidence_id": "elf-source-library-long-doc-hydrated-excerpt", + "kind": "elf_same_corpus_output", + "text": "ELF same-corpus source output: source_library job source-library-long-doc-001 materializes source id article-hydrated-excerpt with quote source libraries preserve long-form evidence, verified=true, content_hash=long-doc-content-hash, and excerpt_hash=long-doc-excerpt-hash.", + "source_ref": { + "schema": "source_ref/v1", + "resolver": "real_world_job_fixture/v1", + "ref": { + "fixture": "long_document_source_library", + "evidence_id": "article-hydrated-excerpt" + }, + "locator": { + "quote": "verified=true" + } + }, + "created_at": "2026-06-20T01:11:00Z" + }, + { + "evidence_id": "pageindex-adapter-setup-blocker", + "kind": "adapter_blocker", + "text": "PageIndex setup blocker: XY-1068 has no Docker-contained PageIndex installation, no PageIndex MCP readback, and no emitted tree artifact mapping node paths back to source ids article-source-record and article-hydrated-excerpt.", + "source_ref": { + "schema": "source_ref/v1", + "resolver": "real_world_job_fixture/v1", + "ref": { + "fixture": "pageindex_long_document_tree_blocked", + "evidence_id": "pageindex-adapter-setup-blocker" + }, + "locator": { + "quote": "no emitted tree artifact mapping node paths" + } + }, + "created_at": "2026-06-22T03:00:00Z" + }, + { + "evidence_id": "pageindex-required-materialized-output-contract", + "kind": "adapter_contract", + "text": "PageIndex artifact contract: a runnable same-corpus adapter must emit tree_nodes.json or equivalent, cited node paths, long-document traversal output, source ids, and setup/runtime metadata before PageIndex can be scored.", + "source_ref": { + "schema": "source_ref/v1", + "resolver": "real_world_job_fixture/v1", + "ref": { + "fixture": "pageindex_long_document_tree_blocked", + "evidence_id": "pageindex-required-materialized-output-contract" + }, + "locator": { + "quote": "tree_nodes.json or equivalent" + } + }, + "created_at": "2026-06-22T03:01:00Z" + }, + { + "evidence_id": "pageindex-parity-decoy", + "kind": "unsupported_claim", + "text": "Decoy: the ELF Source Library long-document pass proves PageIndex parity without running PageIndex tree retrieval.", + "source_ref": { + "schema": "source_ref/v1", + "resolver": "real_world_job_fixture/v1", + "ref": { + "fixture": "pageindex_long_document_tree_blocked", + "evidence_id": "pageindex-parity-decoy" + } + }, + "created_at": "2026-06-22T02:59:00Z" + } + ], + "adapter_response": { + "adapter_id": "fixture_pageindex_openkb_same_corpus", + "answer": { + "content": "PageIndex remains blocked on the same source-library corpus. ELF materializes source ids article-source-record and article-hydrated-excerpt with source_ref hydration, but no contained PageIndex run has emitted tree_nodes.json, cited node paths, traversal output, or MCP readback mapped to those source ids. No PageIndex parity, win, tie, or loss is claimed.", + "claims": [ + { + "claim_id": "elf_source_outputs_present", + "text": "ELF materializes the same-corpus long-document source record and hydrated excerpt source ids.", + "evidence_ids": [ + "elf-source-library-long-doc-source-record", + "elf-source-library-long-doc-hydrated-excerpt" + ], + "confidence": "high" + }, + { + "claim_id": "pageindex_setup_blocked", + "text": "PageIndex scoring is blocked because no contained PageIndex installation, MCP readback, or tree artifact is checked in.", + "evidence_ids": ["pageindex-adapter-setup-blocker"], + "confidence": "high" + }, + { + "claim_id": "pageindex_artifact_contract", + "text": "A runnable PageIndex adapter must emit tree nodes, cited node paths, traversal output, source ids, and setup/runtime metadata before scoring.", + "evidence_ids": ["pageindex-required-materialized-output-contract"], + "confidence": "high" + } + ], + "evidence_ids": [ + "elf-source-library-long-doc-source-record", + "elf-source-library-long-doc-hydrated-excerpt", + "pageindex-adapter-setup-blocker", + "pageindex-required-materialized-output-contract" + ], + "latency_ms": 1.0, + "cost": { + "currency": "USD", + "amount": 0.0, + "input_tokens": 0, + "output_tokens": 0 + } + } + } + }, + "timeline": [ + { + "event_id": "elf-source-library-output-selected", + "ts": "2026-06-22T03:00:00Z", + "actor": "benchmark", + "action": "selected_same_corpus_source_outputs", + "evidence_ids": [ + "elf-source-library-long-doc-source-record", + "elf-source-library-long-doc-hydrated-excerpt" + ], + "summary": "The PageIndex comparison uses the same long-document Source Library corpus ids as the ELF source-library fixture." + }, + { + "event_id": "pageindex-blocker-recorded", + "ts": "2026-06-22T03:01:00Z", + "actor": "benchmark", + "action": "recorded_pageindex_setup_blocker", + "evidence_ids": [ + "pageindex-adapter-setup-blocker", + "pageindex-required-materialized-output-contract" + ], + "summary": "PageIndex remains blocked until tree artifacts and MCP readback map back to same-corpus source ids." + } + ], + "prompt": { + "role": "user", + "content": "Compare PageIndex long-document tree retrieval against the ELF Source Library long-document corpus only if PageIndex emits cited tree artifacts.", + "job_mode": "compare", + "constraints": [ + "cite_same_corpus_source_ids", + "require_pageindex_tree_artifacts", + "typed_setup_blocker", + "no_pageindex_parity_claim" + ] + }, + "expected_answer": { + "must_include": [ + { + "claim_id": "elf_source_outputs_present", + "text": "ELF materializes the same-corpus long-document source record and hydrated excerpt source ids." + }, + { + "claim_id": "pageindex_setup_blocked", + "text": "PageIndex scoring is blocked because no contained PageIndex installation, MCP readback, or tree artifact is checked in." + }, + { + "claim_id": "pageindex_artifact_contract", + "text": "A runnable PageIndex adapter must emit tree nodes, cited node paths, traversal output, source ids, and setup/runtime metadata before scoring." + } + ], + "must_not_include": [ + "ELF Source Library long-document pass proves PageIndex parity.", + "ELF beats PageIndex on long-document tree retrieval." + ], + "evidence_links": { + "elf_source_outputs_present": [ + "elf-source-library-long-doc-source-record", + "elf-source-library-long-doc-hydrated-excerpt" + ], + "pageindex_setup_blocked": ["pageindex-adapter-setup-blocker"], + "pageindex_artifact_contract": ["pageindex-required-materialized-output-contract"] + }, + "answer_type": "typed_blocker", + "accepted_alternates": [], + "requires_caveat": true, + "requires_refusal": false + }, + "required_evidence": [ + { + "evidence_id": "elf-source-library-long-doc-source-record", + "claim_id": "elf_source_outputs_present", + "requirement": "cite", + "quote": "canonical_uri=https://example.com/research/agent-memory-os" + }, + { + "evidence_id": "pageindex-adapter-setup-blocker", + "claim_id": "pageindex_setup_blocked", + "requirement": "cite", + "quote": "no emitted tree artifact mapping node paths" + }, + { + "evidence_id": "pageindex-required-materialized-output-contract", + "claim_id": "pageindex_artifact_contract", + "requirement": "cite", + "quote": "tree_nodes.json or equivalent" + } + ], + "negative_traps": [ + { + "trap_id": "pageindex-parity-without-run", + "type": "unsupported_claim", + "evidence_ids": ["pageindex-parity-decoy"], + "failure_if_used": true + } + ], + "scoring_rubric": { + "dimensions": { + "uncertainty_handling": { + "weight": 0.35, + "max_points": 1.0, + "criteria": "Keeps PageIndex as blocked until contained tree artifacts exist." + }, + "evidence_grounding": { + "weight": 0.3, + "max_points": 1.0, + "criteria": "Names the same-corpus ELF source ids and required PageIndex artifact ids." + }, + "workflow_helpfulness": { + "weight": 0.2, + "max_points": 1.0, + "criteria": "Identifies the concrete PageIndex output needed for rerun." + }, + "trap_avoidance": { + "weight": 0.15, + "max_points": 1.0, + "criteria": "Does not promote ELF long-document evidence into PageIndex parity." + } + }, + "pass_threshold": 0.85, + "hard_fail_rules": [ + "unsupported high-confidence claim about PageIndex parity", + "use of a negative trap marked failure_if_used = true" + ] + }, + "allowed_uncertainty": { + "can_answer_unknown": false, + "acceptable_phrases": [], + "fallback_action": "state_blocker" + }, + "tags": [ + "external_adapter", + "pageindex", + "source_library", + "long_document", + "typed_blocker", + "same_corpus" + ] +} diff --git a/apps/elf-eval/fixtures/report_snapshots/2026-06-22-pageindex-openkb-same-corpus-adapter-report.json b/apps/elf-eval/fixtures/report_snapshots/2026-06-22-pageindex-openkb-same-corpus-adapter-report.json new file mode 100644 index 00000000..91a991be --- /dev/null +++ b/apps/elf-eval/fixtures/report_snapshots/2026-06-22-pageindex-openkb-same-corpus-adapter-report.json @@ -0,0 +1,119 @@ +{ + "schema": "elf.pageindex_openkb_same_corpus_adapter_report/v1", + "authority": "XY-1068", + "phase": "P3 PageIndex/OpenKB same-corpus adapter blocker", + "generated_at": "2026-06-22T00:00:00Z", + "report_markdown": "docs/evidence/benchmarking/2026-06-22-pageindex-openkb-same-corpus-adapter-report.md", + "command": { + "command": "cargo make real-world-memory-pageindex-openkb", + "status": "pass", + "artifact_json": "tmp/real-world-memory/pageindex-openkb/report.json", + "artifact_markdown": "tmp/real-world-memory/pageindex-openkb/report.md", + "proves": [ + "pageindex_same_corpus_typed_setup_blocker", + "openkb_same_corpus_typed_setup_blocker", + "no_pageindex_openkb_parity_claim_without_materialized_outputs" + ] + }, + "checked_in_artifacts": [ + { + "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/pageindex_openkb/pageindex_long_document_tree_blocked.json", + "target": "VectifyAI PageIndex", + "status": "blocked" + }, + { + "artifact": "apps/elf-eval/fixtures/real_world_external_adapters/pageindex_openkb/openkb_wiki_recompile_blocked.json", + "target": "VectifyAI OpenKB", + "status": "blocked" + } + ], + "scored_status": { + "jobs": 2, + "pass": 0, + "wrong_result": 0, + "incomplete": 0, + "blocked": 2, + "not_encoded": 0, + "unsupported_claim": 0 + }, + "comparison_results": [ + { + "target": "VectifyAI PageIndex", + "suite": "source_library", + "status": "blocked", + "evidence_class": "same_corpus_typed_setup_blocker", + "same_corpus_basis": "real-world-memory-source-library-2026-06-20", + "elf_materialized_outputs": [ + { + "source_id": "article-source-record", + "fixture_evidence_id": "elf-source-library-long-doc-source-record", + "materialized_output": "ELF Source Library long-document source record with canonical URI, source kind, author, capture timestamp, and source_ref resolver." + }, + { + "source_id": "article-hydrated-excerpt", + "fixture_evidence_id": "elf-source-library-long-doc-hydrated-excerpt", + "materialized_output": "ELF Source Library hydrated excerpt with verified=true, content hash, excerpt hash, and source_ref hydration pointer." + } + ], + "required_adapter_outputs": [ + "tree_nodes.json or equivalent PageIndex tree artifact", + "cited node paths mapped to source ids article-source-record and article-hydrated-excerpt", + "long-document traversal output", + "PageIndex MCP readback", + "setup/runtime metadata" + ], + "blocker": "No contained PageIndex installation, MCP readback, or tree artifact is checked in for the same corpus." + }, + { + "target": "VectifyAI OpenKB", + "suite": "knowledge_compilation", + "status": "blocked", + "evidence_class": "same_corpus_typed_setup_blocker", + "same_corpus_basis": "real-world-memory-knowledge-synthetic-2026-06-22", + "elf_materialized_outputs": [ + { + "source_id": "project:elf-benchmark-suite", + "fixture_evidence_id": "elf-knowledge-project-page-output", + "materialized_output": "ELF Knowledge Workspace project page with source ids elf-knowledge-current-truth, elf-knowledge-history, and xy848-issue-timeline." + }, + { + "source_id": "entity:qdrant-rebuild|concept:derived-knowledge-pages|issue:xy848-knowledge-pages", + "fixture_evidence_id": "elf-knowledge-entity-concept-output", + "materialized_output": "ELF Knowledge Workspace entity, concept, and issue pages with source ids qdrant-rebuild-entity, derived-pages-concept, and xy848-current-timeline." + }, + { + "source_id": "project:knowledge-watch-rebuild", + "fixture_evidence_id": "elf-knowledge-watch-rebuild-output", + "materialized_output": "ELF changed-source watch/rebuild page with source ids watch-source-original, watch-source-updated, watch-lint-output, and watch-memory-candidate-proposal." + } + ], + "required_adapter_outputs": [ + "OpenKB generated wiki page export", + "entity/concept index export", + "lint output", + "saved exploration state", + "watch/recompile trace mapped to same-corpus source ids", + "setup/runtime metadata" + ], + "blocker": "No contained OpenKB product run, wiki export, entity/concept index export, lint output, saved exploration state, or watch/recompile trace is checked in for the same corpus." + } + ], + "requirements_refinement": [ + "Source Library comparison jobs must name source ids and source_ref hydration outputs before asking an external long-document tree adapter to score.", + "Knowledge Workspace comparison jobs must name generated page ids, source ids, lint/watch outputs, and recompile traces before asking an external wiki adapter to score.", + "Reference-only PageIndex/OpenKB rows may become typed blocked adapter jobs only when the blocker records the exact missing materialized outputs and preserves no-parity claim boundaries." + ], + "claim_boundaries": { + "allowed": [ + "The PageIndex/OpenKB P3 fixture slice emits two same-corpus typed setup blockers.", + "The blockers identify the ELF materialized source-library and knowledge outputs plus the exact PageIndex/OpenKB outputs required for future scoring.", + "The repo now has a repeatable cargo-make task for the PageIndex/OpenKB blocker slice." + ], + "not_allowed": [ + "Do not claim ELF beats PageIndex or OpenKB.", + "Do not claim PageIndex or OpenKB parity, win, tie, or loss.", + "Do not treat blocked PageIndex/OpenKB adapter jobs as weakness or strength evidence.", + "Do not claim PageIndex MCP or OpenKB product UI behavior was run." + ] + } +} diff --git a/docs/evidence/benchmarking/2026-06-22-pageindex-openkb-same-corpus-adapter-report.md b/docs/evidence/benchmarking/2026-06-22-pageindex-openkb-same-corpus-adapter-report.md new file mode 100644 index 00000000..47abf2c8 --- /dev/null +++ b/docs/evidence/benchmarking/2026-06-22-pageindex-openkb-same-corpus-adapter-report.md @@ -0,0 +1,127 @@ +--- +type: Evidence +title: "PageIndex/OpenKB Same-Corpus Adapter Report - June 22, 2026" +description: "Typed setup-blocker evidence for PageIndex/OpenKB same-corpus comparison against ELF Source Library and Knowledge Workspace outputs." +resource: docs/evidence/benchmarking/2026-06-22-pageindex-openkb-same-corpus-adapter-report.md +status: active +authority: evidence +owner: benchmarking +last_verified: 2026-06-22 +tags: + - docs + - evidence + - benchmarking + - pageindex + - openkb +source_refs: + - apps/elf-eval/fixtures/report_snapshots/2026-06-22-pageindex-openkb-same-corpus-adapter-report.json + - apps/elf-eval/fixtures/real_world_external_adapters/pageindex_openkb/ +code_refs: + - Makefile.toml +related: + - docs/evidence/benchmarking/2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.md + - docs/spec/agent_memory_knowledge_system_v1.md + - docs/spec/real_world_agent_memory_benchmark_v1.md +drift_watch: + - docs/evidence/benchmarking/2026-06-22-pageindex-openkb-same-corpus-adapter-report.md + - apps/elf-eval/fixtures/report_snapshots/2026-06-22-pageindex-openkb-same-corpus-adapter-report.json + - apps/elf-eval/fixtures/real_world_external_adapters/pageindex_openkb/ + - Makefile.toml +--- +# PageIndex/OpenKB Same-Corpus Adapter Report - June 22, 2026 + +Purpose: Close XY-1068 by turning the P2 reference-only PageIndex/OpenKB rows into +same-corpus typed setup blockers with explicit source ids and required materialized +outputs. +Status: evidence +Read this when: You need to know what PageIndex/OpenKB comparison evidence exists +after the P2 Knowledge Workspace closeout. +Not this document: A PageIndex product run, OpenKB product run, parity result, or +win/tie/loss comparison. +Inputs: `apps/elf-eval/fixtures/real_world_external_adapters/pageindex_openkb/` +and `apps/elf-eval/fixtures/report_snapshots/2026-06-22-pageindex-openkb-same-corpus-adapter-report.json`. + +## Command + +```sh +cargo make real-world-memory-pageindex-openkb +``` + +The command writes generated runner output to: + +- `tmp/real-world-memory/pageindex-openkb/report.json` +- `tmp/real-world-memory/pageindex-openkb/report.md` + +Checked-in evidence is: + +- `apps/elf-eval/fixtures/real_world_external_adapters/pageindex_openkb/pageindex_long_document_tree_blocked.json` +- `apps/elf-eval/fixtures/real_world_external_adapters/pageindex_openkb/openkb_wiki_recompile_blocked.json` +- `apps/elf-eval/fixtures/report_snapshots/2026-06-22-pageindex-openkb-same-corpus-adapter-report.json` + +## Result + +The same-corpus PageIndex/OpenKB slice is runnable and scores as typed blockers: + +| Target | Suite | Status | Jobs | Pass | Wrong result | Incomplete | Blocked | Not encoded | +| --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | +| VectifyAI PageIndex | `source_library` | `blocked` | 1 | 0 | 0 | 0 | 1 | 0 | +| VectifyAI OpenKB | `knowledge_compilation` | `blocked` | 1 | 0 | 0 | 0 | 1 | 0 | + +Typed state summary: 0 pass, 0 wrong_result, 0 incomplete, 2 blocked, and 0 +not_encoded rows for this two-job slice. The generated runner report still marks +unrelated suites as `not_encoded`, because this task intentionally runs only the +PageIndex/OpenKB fixtures. + +## Same-Corpus Outputs + +PageIndex comparison now points at the ELF Source Library long-document corpus: + +| Source id | Materialized ELF output | Required PageIndex output | +| --- | --- | --- | +| `article-source-record` | Long-document Source Library record with canonical URI, source kind, author, capture timestamp, and `elf_doc_ext/v1` source ref. | PageIndex tree node or path that maps back to this source id. | +| `article-hydrated-excerpt` | Hydrated excerpt with `verified=true`, content hash, excerpt hash, and source-ref hydration pointer. | Long-document traversal output with cited node path and excerpt/source-id mapping. | + +OpenKB comparison now points at the ELF Knowledge Workspace corpus: + +| Source/page id | Materialized ELF output | Required OpenKB output | +| --- | --- | --- | +| `project:elf-benchmark-suite` | Project page with source ids `elf-knowledge-current-truth`, `elf-knowledge-history`, and `xy848-issue-timeline`. | OpenKB wiki page export citing matching source ids. | +| `entity:qdrant-rebuild`, `concept:derived-knowledge-pages`, `issue:xy848-knowledge-pages` | Entity, concept, and issue pages with source ids `qdrant-rebuild-entity`, `derived-pages-concept`, and `xy848-current-timeline`. | OpenKB entity/concept index export with citations. | +| `project:knowledge-watch-rebuild` | Changed-source watch/rebuild output with `watch-source-original`, `watch-source-updated`, `watch-lint-output`, and `watch-memory-candidate-proposal`. | OpenKB lint output, saved exploration state, and watch/recompile trace mapped to those source ids. | + +## Blockers + +PageIndex remains `blocked` because no contained PageIndex installation, MCP +readback, tree artifact, cited node path output, or traversal report is checked in for +this corpus. + +OpenKB remains `blocked` because no contained OpenKB product run, wiki export, +entity/concept index export, lint output, saved exploration state, or watch/recompile +trace is checked in for this corpus. + +## Requirements Refinement + +- Source Library comparison jobs must name source ids and source-ref hydration outputs + before asking an external long-document tree adapter to score. +- Knowledge Workspace comparison jobs must name generated page ids, source ids, + lint/watch outputs, and recompile traces before asking an external wiki adapter to + score. +- Reference-only PageIndex/OpenKB rows may become typed blocked adapter jobs only when + the blocker records the exact missing materialized outputs and preserves no-parity + claim boundaries. + +## Claim Boundary + +Allowed: + +- The PageIndex/OpenKB P3 fixture slice emits two same-corpus typed setup blockers. +- The blockers identify ELF materialized outputs and required PageIndex/OpenKB outputs + for future scoring. +- The repo has a repeatable `cargo make real-world-memory-pageindex-openkb` task. + +Not allowed: + +- Do not claim ELF beats PageIndex or OpenKB. +- Do not claim PageIndex or OpenKB parity, win, tie, or loss. +- Do not treat blocked PageIndex/OpenKB adapter jobs as weakness or strength evidence. +- Do not claim PageIndex MCP or OpenKB product UI behavior was run. diff --git a/docs/evidence/benchmarking/index.md b/docs/evidence/benchmarking/index.md index c23fc757..e541547f 100644 --- a/docs/evidence/benchmarking/index.md +++ b/docs/evidence/benchmarking/index.md @@ -51,3 +51,4 @@ Routes to: Benchmarking evidence concepts under `docs/evidence/benchmarking/`. - `2026-06-20-agent-knowledge-os-closeout-benchmark-report.md`: Agent Knowledge OS Closeout Benchmark Report - June 20, 2026; publishes the XY-1023 full product/scenario matrix, names ELF as the strongest measured integrated product, preserves qmd/OpenViking/mem0/OpenMemory/Letta/graph-RAG/VectifyAI strengths, and turns material non-pass or reference-only deltas into optimization queue items. - `2026-06-22-p1-memory-authority-closeout-report.md`: P1 Memory Authority Closeout Report - June 22, 2026; adds `cargo make real-world-memory-p1-closeout`, scores the P1 Source Library -> Memory Candidate -> approved memory -> recall/debug -> correction/rollback chain as 4 pass, and keeps P2 queueing conditional on main-thread acceptance. - `2026-06-22-p2-knowledge-workspace-pageindex-openkb-closeout-report.md`: P2 Knowledge Workspace PageIndex/OpenKB Closeout Report - June 22, 2026; adds `cargo make real-world-memory-p2-knowledge-closeout`, scores the Source Library and Knowledge Workspace fixture slices as pass, preserves PageIndex/OpenKB as `not_tested` reference-only rows, and keeps P3 adapter queueing behind main-thread acceptance. +- `2026-06-22-pageindex-openkb-same-corpus-adapter-report.md`: PageIndex/OpenKB Same-Corpus Adapter Report - June 22, 2026; adds `cargo make real-world-memory-pageindex-openkb`, emits checked-in same-corpus typed setup blockers for PageIndex and OpenKB, names source ids and required materialized outputs, and preserves no parity, win, tie, or loss claim. diff --git a/docs/spec/agent_memory_knowledge_system_v1.md b/docs/spec/agent_memory_knowledge_system_v1.md index 25fce9e3..f2404378 100644 --- a/docs/spec/agent_memory_knowledge_system_v1.md +++ b/docs/spec/agent_memory_knowledge_system_v1.md @@ -151,7 +151,7 @@ implement every item in a phase at once. | P0 | Product contract and phase gate | Codify this product boundary, roadmap, competitor absorption rules, validation expectations, and closeout checklist. | Docs are reviewed, repo docs validation passes, claim boundaries match the June 20 closeout evidence, and the main thread accepts the next phase. | | P1 | Memory Authority MVP loop | Deliver one source-backed memory-authority vertical slice: capture source evidence, create/review one proposal through a proposal inbox, record the authority ledger, apply/correct/rollback, recall through agent-facing tools, and debug stale/correction behavior. | The slice has service tests, provenance/history evidence, recall/debug readback, and at least one real-world stale/correction benchmark job. | | P2 | Knowledge Workspace | Promote source-linked project/entity/concept/issue/decision/author/timeline pages with rebuild, lint, watch, search, and version-diff readback. | Pages stay derived, every section is cited or explicitly unsupported, stale-source lint runs, and benchmark reports publish citation/staleness metrics. | -| P3 | Competitor-strength adapters | Add contained comparison adapters for qmd replay, PageIndex/OpenKB, mem0/OpenMemory, Letta, Graphiti/Zep, OpenViking, graph/RAG references, and other accepted deltas. | Each adapter preserves typed non-pass states and emits same-corpus evidence before any parity, win, tie, or loss claim. | +| P3 | Competitor-strength adapters | Add contained comparison adapters for qmd replay, PageIndex/OpenKB, mem0/OpenMemory, Letta, Graphiti/Zep, OpenViking, graph/RAG references, and other accepted deltas. | Each adapter preserves typed non-pass states and emits same-corpus evidence or a concrete typed setup blocker before any parity, win, tie, or loss claim. | | P4 | Benchmark and quality hardening | Expand adversarial jobs, public comparison grammar, quality metrics, latency/cost/resource reporting, and unsupported-claim detection. | Reports preserve job/suite/project typed states, expected evidence recall, irrelevant context ratio, unsupported claims, and resource metrics. | | P5 | Productization | Improve local setup, agent recipes, operator UI, privacy/delete/export boundaries, and production-quality workflows. | Operator workflows have documented setup, privacy/delete/export semantics, and validation evidence without weakening source authority. | @@ -193,8 +193,8 @@ dependencies and are not automatic proof that ELF is weaker or stronger. | Competitor/reference | Strength to absorb | Claim boundary | | --- | --- | --- | | qmd | Transparent expansion, fusion, rerank, top-k, and compact replay ergonomics. | Preserve qmd's debug edge until ELF emits comparable replay artifacts. | -| VectifyAI PageIndex | Long-document tree retrieval and PageIndex MCP ecosystem direction. | No win/tie/loss claim until a same-corpus adapter compares tree artifacts with ELF source refs and recall debug rows. | -| VectifyAI OpenKB | Compiled Markdown wiki, concept/entity pages, lint, watch, and recompile workflows. | Absorb into Knowledge Workspace without treating derived wiki pages as source memory. | +| VectifyAI PageIndex | Long-document tree retrieval and PageIndex MCP ecosystem direction. | No win/tie/loss claim until a same-corpus adapter compares tree artifacts, cited node paths, and MCP readback with ELF source ids and source refs. | +| VectifyAI OpenKB | Compiled Markdown wiki, concept/entity pages, lint, watch, and recompile workflows. | Absorb into Knowledge Workspace only through source-id-mapped wiki, index, lint, and watch/recompile artifacts; derived wiki pages must not become source memory. | | OpenViking | Filesystem-like context URIs, hierarchy selection, staged trajectory, and recursive expansion. | Keep trajectory/hierarchy claims blocked until same-corpus staged artifacts exist. | | mem0/OpenMemory | Entity-scoped history, hosted ecosystem, UI/export, and optional graph memory direction. | Separate local SDK history evidence from hosted, UI/export, and optional graph-memory parity. | | Letta | Core/archive memory split and export/readback model. | No core/archive parity claim until contained Letta export/readback artifacts include source ids. | diff --git a/docs/spec/real_world_agent_memory_benchmark_v1.md b/docs/spec/real_world_agent_memory_benchmark_v1.md index 2cac3834..3b48edbf 100644 --- a/docs/spec/real_world_agent_memory_benchmark_v1.md +++ b/docs/spec/real_world_agent_memory_benchmark_v1.md @@ -559,7 +559,8 @@ Suite ids are stable public names. Each suite MUST contain at least one | `memory_evolution` | Verify updates, deletes, expiry, supersession, contradiction handling, and history. | Apply a new preference; suppress a deleted memory; explain what superseded an old fact. | Before/after memory versions, ingest decision rows or adapter history, current timeline event. | lifecycle_behavior, answer_correctness, evidence_grounding, trap_avoidance. | mem0, ELF, Graphiti/Zep, Letta. | | `consolidation` | Test reviewable derived memory formation without hidden source mutation. | Produce a consolidation proposal; identify unsupported claims; discard stale synthesis. | Source inputs, derived proposal id, lineage, review state, conflict markers. | answer_correctness, evidence_grounding, uncertainty_handling, debuggability. | Claude Dreams, Gemini CLI Auto Memory, Always-On Memory Agent, ELF. | | `memory_summary` | Test reviewable top-of-mind, background, stale, superseded, tombstoned, and derived project-profile memory readback. | Produce a current memory summary; downgrade stale memory; expose a TTL tombstone; refuse an unsupported derived profile claim. | Summary entry source refs, freshness and validity markers, source trace, inclusion/downgrade/exclusion rationale, unsupported-claim flags. | answer_correctness, evidence_grounding, lifecycle_behavior, trap_avoidance, uncertainty_handling. | OpenAI Dreaming, Claude Dreams, Always-On Memory Agent, ELF. | -| `knowledge_compilation` | Compile evidence into maintained project/entity/concept pages while preserving provenance. | Build a project status page; answer from compiled truth plus timeline; lint a stale page section. | Page section sources, backlinks, timeline entries, lint evidence. | answer_correctness, evidence_grounding, workflow_helpfulness, trap_avoidance. | llm-wiki, gbrain, graphify, ELF. | +| `knowledge_compilation` | Compile evidence into maintained project/entity/concept pages while preserving provenance. | Build a project status page; answer from compiled truth plus timeline; lint a stale page section. | Page section sources, backlinks, timeline entries, lint evidence. | answer_correctness, evidence_grounding, workflow_helpfulness, trap_avoidance. | llm-wiki, gbrain, graphify, OpenKB, ELF. | +| `source_library` | Preserve long-form source records and citable excerpts without silently promoting them to memory. | Capture a long document; hydrate a source_ref excerpt; preserve a social/thread source boundary. | Source ids, canonical source metadata, source_ref hydration pointers, verified excerpts, explicit no-autopromotion boundary. | answer_correctness, evidence_grounding, lifecycle_behavior, trap_avoidance. | PageIndex, ELF. | | `operator_debugging_ux` | Show whether a wrong or ambiguous memory result can be debugged without raw store spelunking. | Explain why a result ranked first; inspect a trace; identify which stage dropped expected evidence. | Trace bundle, retrieval trajectory, candidate metrics, viewer or CLI readback. | debuggability, evidence_grounding, workflow_helpfulness, answer_correctness. | claude-mem, qmd, agentmemory, ELF. | | `capture_integration` | Evaluate how accurately work observations become usable memory across agents and tools. | Capture a session decision; exclude private spans; import external agent observations. | Hook/import logs, write policy audits, excluded spans, resulting note ids. | answer_correctness, evidence_grounding, trap_avoidance, lifecycle_behavior. | agentmemory, claude-mem, memsearch, mem0. | | `production_ops` | Prove safe operation under backup, restore, backfill, cold start, resource, and credential boundaries. | Resume interrupted import; restore from backup; report missing private manifest as bounded caveat. | Command/report artifacts, resource envelope, checkpoint state, failure guard evidence. | lifecycle_behavior, latency_resource, uncertainty_handling, evidence_grounding. | ELF, qmd, memsearch, LangGraph. |