Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions apps/elf-api/src/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,7 @@ pub fn router(state: AppState) -> Router {
.route("/v2/events/ingest", routing::post(events_ingest))
.route("/v2/core-blocks", routing::get(core_blocks_get))
.route("/v2/entity-memory", routing::get(entity_memory_get))
.route("/v2/recall-debug/panel", routing::post(recall_debug_panel))
.route("/v2/searches", routing::post(searches_create))
.route("/v2/searches/{search_id}", routing::get(searches_get))
.route("/v2/searches/{search_id}/timeline", routing::get(searches_timeline))
Expand Down Expand Up @@ -777,7 +778,7 @@ pub fn admin_router(state: AppState) -> Router {
routing::post(consolidation_proposal_review),
)
.route("/v2/admin/dreaming/review-queue", routing::get(dreaming_review_queue))
.route("/v2/admin/recall-debug/panel", routing::post(recall_debug_panel))
.route("/v2/admin/recall-debug/panel", routing::post(admin_recall_debug_panel))
.route("/v2/admin/knowledge/pages", routing::get(knowledge_pages_list))
.route("/v2/admin/knowledge/pages/rebuild", routing::post(knowledge_page_rebuild))
.route("/v2/admin/knowledge/pages/search", routing::post(knowledge_pages_search))
Expand Down Expand Up @@ -3190,21 +3191,38 @@ async fn dreaming_review_queue(

#[utoipa::path(
post,
path = "/v2/admin/recall-debug/panel",
path = "/v2/recall-debug/panel",
tag = "recall",
request_body = Value,
responses(
(status = 200, description = "Cross-layer recall/debug panel.", body = Value),
(status = 200, description = "Agent-facing cross-layer recall/debug panel.", body = Value),
(status = 400, description = "Invalid request.", body = ErrorBody),
(status = 401, description = "Authentication required.", body = ErrorBody),
(status = 403, description = "Admin access required.", body = ErrorBody),
(status = 403, description = "Scope denied.", body = ErrorBody),
(status = 500, description = "Internal error.", body = ErrorBody),
)
)]
async fn recall_debug_panel(
State(state): State<AppState>,
headers: HeaderMap,
payload: Result<Json<RecallDebugPanelBody>, JsonRejection>,
) -> Result<Json<RecallDebugPanelResponse>, ApiError> {
recall_debug_panel_inner(state, headers, payload, false).await
}

async fn admin_recall_debug_panel(
State(state): State<AppState>,
headers: HeaderMap,
payload: Result<Json<RecallDebugPanelBody>, JsonRejection>,
) -> Result<Json<RecallDebugPanelResponse>, ApiError> {
recall_debug_panel_inner(state, headers, payload, true).await
}

async fn recall_debug_panel_inner(
state: AppState,
headers: HeaderMap,
payload: Result<Json<RecallDebugPanelBody>, JsonRejection>,
allow_project_trace_debug: bool,
) -> Result<Json<RecallDebugPanelResponse>, ApiError> {
let ctx = RequestContext::from_headers(&headers)?;
let read_profile = required_read_profile(&headers)?;
Expand All @@ -3228,6 +3246,7 @@ async fn recall_debug_panel(
graph_predicate: payload.graph_predicate,
include_dreaming: payload.include_dreaming,
limit: payload.limit,
allow_project_trace_debug,
})
.await?;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
"generated_at": "2026-06-20T00:00:00Z",
"service_contract": {
"response_schema": "elf.recall_debug_panel/v1",
"trace_schema": "elf.recall_trace/v1",
"service_module": "packages/elf-service/src/recall_debug.rs",
"http_endpoint": "POST /v2/admin/recall-debug/panel",
"http_endpoint": "POST /v2/recall-debug/panel",
"admin_http_mirror": "POST /v2/admin/recall-debug/panel",
"mcp_tool": "elf_recall_debug_panel",
"spec": "docs/spec/system_recall_debug_panel_v1.md",
"read_model_only": true,
Expand Down Expand Up @@ -71,12 +73,19 @@
"freshness_state_required": true,
"stage_reason_required": true,
"source_refs_required": true,
"deterministic_recall_trace": true,
"stale_context_visible": true,
"replay_command_or_artifact_path_required_when_available": true,
"no_source_mutation": true,
"no_graph_mutation": true,
"no_proposal_review_mutation": true
},
"command_evidence": [
{
"command": "cargo test -p elf-service recall_trace --lib",
"status": "pass",
"purpose": "Unit-check deterministic recall_trace stale, dropped, blocked, and not_requested projection."
},
{
"command": "cargo test -p elf-service recall_debug -- --nocapture",
"status": "pass",
Expand Down
116 changes: 80 additions & 36 deletions apps/elf-eval/tests/real_world_job_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,19 @@ use std::{
use color_eyre::{Result, eyre};
use serde_json::Value;

struct RecallDebugSourceContract<'a> {
service: &'a str,
service_lib: &'a str,
routes: &'a str,
mcp: &'a str,
recall_spec: &'a str,
service_spec: &'a str,
version_registry: &'a str,
markdown: &'a str,
benchmarking_index: &'a str,
readme: &'a str,
}

fn fixture_dir() -> PathBuf {
Path::new(env!("CARGO_MANIFEST_DIR"))
.join("fixtures")
Expand Down Expand Up @@ -3700,6 +3713,56 @@ fn dreaming_review_queue_report_wires_reviewable_policy_contract() -> Result<()>
Ok(())
}

fn assert_recall_debug_source_contract(sources: &RecallDebugSourceContract<'_>) {
assert!(sources.service.contains("ELF_RECALL_DEBUG_PANEL_SCHEMA_V1"));
assert!(sources.service.contains("ELF_RECALL_TRACE_SCHEMA_V1"));
assert!(sources.service.contains("pub async fn recall_debug_panel"));
assert!(sources.service.contains("build_recall_trace"));
assert!(sources.service.contains("not_requested_layer"));
assert!(sources.service.contains("blocked_layer"));
assert!(sources.service.contains("public_error_class"));
assert!(sources.service.contains("candidate_identity"));
assert!(sources.service.contains("ORG_PROJECT_ID"));
assert!(sources.service.contains("trace_bundle_get"));
assert!(sources.service.contains("docs_search_l0"));
assert!(sources.service.contains("knowledge_pages_search"));
assert!(sources.service.contains("graph_report"));
assert!(sources.service.contains("dreaming_review_queue"));
assert!(sources.service_lib.contains("pub mod recall_debug"));
assert!(sources.service_lib.contains("RecallDebugPanelResponse"));
assert!(sources.service_lib.contains("RecallTrace"));
assert!(sources.routes.contains("/v2/recall-debug/panel"));
assert!(sources.routes.contains("/v2/admin/recall-debug/panel"));
assert!(sources.routes.contains("async fn recall_debug_panel"));
assert!(sources.routes.contains("RecallDebugPanelRequest"));
assert!(sources.mcp.contains("elf_recall_debug_panel"));
assert!(sources.mcp.contains("recall_debug_panel_schema"));
assert!(sources.mcp.contains("/v2/recall-debug/panel"));
assert!(sources.recall_spec.contains("elf.recall_debug_panel/v1"));
assert!(sources.recall_spec.contains("elf.recall_trace/v1"));
assert!(sources.recall_spec.contains("not_requested"));
assert!(sources.recall_spec.contains("evidence_class = \"blocked\""));
assert!(sources.recall_spec.contains("effective `top_k` cap of 32"));
assert!(sources.recall_spec.contains("context_state = \"stale\""));
assert!(sources.recall_spec.contains("selected`, `dropped`, `available`, or `reviewable`"));
assert!(sources.service_spec.contains("POST /v2/recall-debug/panel"));
assert!(sources.service_spec.contains("POST /v2/admin/recall-debug/panel"));
assert!(sources.service_spec.contains("elf.recall_trace/v1"));
assert!(sources.service_spec.contains("system_recall_debug_panel_v1.md"));
assert!(sources.version_registry.contains("elf.recall_debug_panel/v1"));
assert!(sources.version_registry.contains("elf.recall_trace/v1"));
assert!(sources.markdown.contains("Recall Debug Panel Report"));
assert!(sources.markdown.contains("POST /v2/recall-debug/panel"));
assert!(sources.markdown.contains("`elf.recall_trace/v1`"));
assert!(sources.markdown.contains("Missing anchors stay visible as `not_requested`"));
assert!(sources.markdown.contains("retained dropped replay candidates"));
assert!(sources.markdown.contains("effective cap of 32 rows"));
assert!(sources.benchmarking_index.contains("2026-06-20-recall-debug-panel-report.md"));
assert!(sources.readme.contains("Recall/debug panel after XY-1022"));
assert!(sources.readme.contains("elf.recall_debug_panel/v1"));
assert!(sources.readme.contains("retained dropped replay candidates"));
}

#[test]
fn recall_debug_panel_report_wires_cross_layer_debug_contract() -> Result<()> {
let report = serde_json::from_str::<Value>(&fs::read_to_string(
Expand Down Expand Up @@ -3729,6 +3792,10 @@ fn recall_debug_panel_report_wires_cross_layer_debug_contract() -> Result<()> {
report.pointer("/service_contract/response_schema").and_then(Value::as_str),
Some("elf.recall_debug_panel/v1")
);
assert_eq!(
report.pointer("/service_contract/trace_schema").and_then(Value::as_str),
Some("elf.recall_trace/v1")
);
assert_eq!(
report.pointer("/service_contract/read_model_only").and_then(Value::as_bool),
Some(true)
Expand Down Expand Up @@ -3781,42 +3848,19 @@ fn recall_debug_panel_report_wires_cross_layer_debug_contract() -> Result<()> {
report.pointer("/debug_invariants/no_source_mutation").and_then(Value::as_bool),
Some(true)
);
assert!(service.contains("ELF_RECALL_DEBUG_PANEL_SCHEMA_V1"));
assert!(service.contains("pub async fn recall_debug_panel"));
assert!(service.contains("not_requested_layer"));
assert!(service.contains("blocked_layer"));
assert!(service.contains("public_error_class"));
assert!(service.contains("candidate_identity"));
assert!(service.contains("ORG_PROJECT_ID"));
assert!(service.contains("trace_bundle_get"));
assert!(service.contains("docs_search_l0"));
assert!(service.contains("knowledge_pages_search"));
assert!(service.contains("graph_report"));
assert!(service.contains("dreaming_review_queue"));
assert!(service_lib.contains("pub mod recall_debug"));
assert!(service_lib.contains("RecallDebugPanelResponse"));
assert!(routes.contains("/v2/admin/recall-debug/panel"));
assert!(routes.contains("async fn recall_debug_panel"));
assert!(routes.contains("RecallDebugPanelRequest"));
assert!(mcp.contains("elf_recall_debug_panel"));
assert!(mcp.contains("recall_debug_panel_schema"));
assert!(mcp.contains("/v2/admin/recall-debug/panel"));
assert!(recall_spec.contains("elf.recall_debug_panel/v1"));
assert!(recall_spec.contains("not_requested"));
assert!(recall_spec.contains("evidence_class = \"blocked\""));
assert!(recall_spec.contains("effective `top_k` cap of 32"));
assert!(recall_spec.contains("selected`, `dropped`, `available`, or `reviewable`"));
assert!(service_spec.contains("POST /v2/admin/recall-debug/panel"));
assert!(service_spec.contains("system_recall_debug_panel_v1.md"));
assert!(version_registry.contains("elf.recall_debug_panel/v1"));
assert!(markdown.contains("Recall Debug Panel Report"));
assert!(markdown.contains("Missing anchors stay visible as `not_requested`"));
assert!(markdown.contains("retained dropped replay candidates"));
assert!(markdown.contains("effective cap of 32 rows"));
assert!(benchmarking_index.contains("2026-06-20-recall-debug-panel-report.md"));
assert!(readme.contains("Recall/debug panel after XY-1022"));
assert!(readme.contains("elf.recall_debug_panel/v1"));
assert!(readme.contains("retained dropped replay candidates"));

assert_recall_debug_source_contract(&RecallDebugSourceContract {
service: &service,
service_lib: &service_lib,
routes: &routes,
mcp: &mcp,
recall_spec: &recall_spec,
service_spec: &service_spec,
version_registry: &version_registry,
markdown: &markdown,
benchmarking_index: &benchmarking_index,
readme: &readme,
});

Ok(())
}
Expand Down
18 changes: 14 additions & 4 deletions apps/elf-mcp/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ impl ElfMcp {

#[rmcp::tool(
name = "elf_recall_debug_panel",
description = "Build a cross-layer recall/debug panel over memory traces, source documents, knowledge pages, graph facts, and Dreaming proposals.",
description = "Build an agent-facing cross-layer recall/debug panel and deterministic recall_trace over memory traces, source documents, knowledge pages, graph facts, and Dreaming proposals.",
input_schema = recall_debug_panel_schema()
)]
async fn elf_recall_debug_panel(
Expand All @@ -384,7 +384,7 @@ impl ElfMcp {
) -> Result<CallToolResult, ErrorData> {
reject_context_override_params(&params)?;

self.forward(HttpMethod::Post, "/v2/admin/recall-debug/panel", params, None).await
self.forward(HttpMethod::Post, "/v2/recall-debug/panel", params, None).await
}

#[rmcp::tool(
Expand Down Expand Up @@ -1793,8 +1793,8 @@ mod tests {
ToolDefinition::new(
"elf_recall_debug_panel",
HttpMethod::Post,
"/v2/admin/recall-debug/panel",
"Build a cross-layer recall/debug panel over memory traces, source documents, knowledge pages, graph facts, and Dreaming proposals.",
"/v2/recall-debug/panel",
"Build an agent-facing cross-layer recall/debug panel and deterministic recall_trace over memory traces, source documents, knowledge pages, graph facts, and Dreaming proposals.",
),
ToolDefinition::new(
"elf_searches_get",
Expand Down Expand Up @@ -2122,6 +2122,16 @@ mod tests {
);
assert_eq!(mcp.api_base_for_path("/v2/admin/notes/abcd/history"), "http://127.0.0.1:9001");
assert_eq!(mcp.api_base_for_path("/v2/searches"), "http://127.0.0.1:9000");
assert_eq!(mcp.api_base_for_path("/v2/recall-debug/panel"), "http://127.0.0.1:9000");
}

#[test]
fn recall_debug_tool_uses_public_agent_route() {
let tools = build_tools();
let tool = tools.get("elf_recall_debug_panel").expect("Missing recall debug panel tool.");

assert_eq!(tool.path, "/v2/recall-debug/panel");
assert!(tool.description.contains("recall_trace"));
}

#[test]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ resource: docs/evidence/benchmarking/2026-06-20-recall-debug-panel-report.md
status: active
authority: current_state
owner: evidence
last_verified: 2026-06-20
last_verified: 2026-06-22
tags:
- docs
- evidence
Expand All @@ -31,6 +31,11 @@ reviewable across the main Agent Knowledge OS layers. This is a product/debug su
over existing authority layers, not a new mutating worker and not a replacement for
the underlying trace, docs, graph, knowledge, or proposal APIs.

The agent-facing endpoint is `POST /v2/recall-debug/panel`. The local admin endpoint
`POST /v2/admin/recall-debug/panel` remains an operator mirror over the same service
read model. Responses include `elf.recall_trace/v1`, a compact deterministic
projection for selected, dropped, stale, blocked, and not-requested context.

## Layer Coverage

| Layer | Anchor | Selection states | Replay/readback |
Expand All @@ -45,6 +50,11 @@ Each row exposes item refs, authority layer, freshness state, source refs or sou
snapshots, score/rank when available, stage reason, evidence class, replay command,
and layer-specific debug artifacts.

The embedded `elf.recall_trace/v1` projection flattens these rows into stable
layer/row order for fixture and report assertions. It carries `context_state`,
`selection_state`, freshness, source refs, score/rank, policy reason, replay command,
and evidence class without requiring raw database inspection.

The panel-level `limit` is a per-layer request cap, but the Source Library layer
inherits the docs-search effective cap of 32 rows and reports requested/effective
limits in document row debug artifacts.
Expand All @@ -53,6 +63,7 @@ limits in document row debug artifacts.

| Command | Status | Purpose |
| --- | --- | --- |
| `cargo test -p elf-service recall_trace --lib` | pass | Unit-check deterministic `recall_trace` stale, dropped, blocked, and not-requested projection. |
| `cargo test -p elf-service recall_debug -- --nocapture` | pass | Unit-check panel summary counters and `not_requested` layer behavior. |
| `cargo test -p elf-mcp registers_all_tools -- --nocapture` | pass | Guard MCP tool registration for `elf_recall_debug_panel`. |
| `cargo test -p elf-eval --test real_world_job_benchmark recall_debug_panel_report_wires_cross_layer_debug_contract -- --nocapture` | pass | Guard service, API, MCP, docs, README, and snapshot coverage for XY-1022. |
Expand All @@ -66,6 +77,8 @@ Allowed:
through trace bundles when candidate capture/retention preserved them.
- Source documents, knowledge pages, graph facts, and Dreaming proposals can be
inspected from one panel response when their anchors are supplied.
- The agent-facing panel response includes a deterministic `elf.recall_trace/v1`
projection for selected, dropped, stale, blocked, and not-requested context.
- Missing anchors stay visible as `not_requested` layers instead of hidden pass
claims.
- Requested layer readback failures stay visible as `blocked` layers instead of
Expand Down
12 changes: 9 additions & 3 deletions docs/spec/system_elf_memory_service_v2.md
Original file line number Diff line number Diff line change
Expand Up @@ -1157,16 +1157,22 @@ Behavior:
- Normal recall remains active-only; `deprecated` and `deleted` notes are visible
through provenance/history or explicit non-active list filters, not ordinary search.

Admin recall/debug panel:
Recall/debug panel:
- POST /v2/recall-debug/panel
- POST /v2/admin/recall-debug/panel

Behavior:
- The endpoint returns `elf.recall_debug_panel/v1`, a read-only cross-layer panel
- The endpoints return `elf.recall_debug_panel/v1`, a read-only cross-layer panel
over Memory Note trace bundles, Source Library document search, Knowledge Workspace
page search, graph reports, and Dreaming review queue proposals.
- The public route is the agent-facing recall/debug API. The admin route is an
operator mirror over the same service read model.
- Each row must expose selection state, authority layer, freshness state, source refs
or source snapshots, score/rank where available, stage reason, evidence class, and
replay command or deterministic artifact path when available.
- Responses must include `recall_trace` with schema `elf.recall_trace/v1`: a compact
deterministic projection over selected, dropped, stale, blocked, and not-requested
context for agent and fixture/report assertions.
- Missing anchors must be represented as `not_requested` layers. The panel must not
collapse not-requested, incomplete, blocked, or wrong-result layers into a broad
pass claim.
Expand Down Expand Up @@ -2457,7 +2463,7 @@ Original query:
- elf_admin_trajectory_get -> GET /v2/admin/trajectories/{trace_id}
- elf_admin_trace_item_get -> GET /v2/admin/trace-items/{item_id}
- elf_admin_trace_bundle_get -> GET /v2/admin/traces/{trace_id}/bundle
- elf_recall_debug_panel -> POST /v2/admin/recall-debug/panel
- elf_recall_debug_panel -> POST /v2/recall-debug/panel
- elf_admin_note_provenance_get -> GET /v2/admin/notes/{note_id}/provenance
- elf_admin_memory_history_get -> GET /v2/admin/notes/{note_id}/history
- The MCP server must contain zero business logic or policy.
Expand Down
Loading