diff --git a/apps/elf-api/src/routes.rs b/apps/elf-api/src/routes.rs index e97fb83f..76a8401a 100644 --- a/apps/elf-api/src/routes.rs +++ b/apps/elf-api/src/routes.rs @@ -691,6 +691,7 @@ pub fn router(state: AppState) -> Router { .route("/v2/events/ingest", routing::post(events_ingest)) .route("/v2/core-blocks", routing::get(core_blocks_get)) .route("/v2/entity-memory", routing::get(entity_memory_get)) + .route("/v2/recall-debug/panel", routing::post(recall_debug_panel)) .route("/v2/searches", routing::post(searches_create)) .route("/v2/searches/{search_id}", routing::get(searches_get)) .route("/v2/searches/{search_id}/timeline", routing::get(searches_timeline)) @@ -777,7 +778,7 @@ pub fn admin_router(state: AppState) -> Router { routing::post(consolidation_proposal_review), ) .route("/v2/admin/dreaming/review-queue", routing::get(dreaming_review_queue)) - .route("/v2/admin/recall-debug/panel", routing::post(recall_debug_panel)) + .route("/v2/admin/recall-debug/panel", routing::post(admin_recall_debug_panel)) .route("/v2/admin/knowledge/pages", routing::get(knowledge_pages_list)) .route("/v2/admin/knowledge/pages/rebuild", routing::post(knowledge_page_rebuild)) .route("/v2/admin/knowledge/pages/search", routing::post(knowledge_pages_search)) @@ -3190,14 +3191,14 @@ async fn dreaming_review_queue( #[utoipa::path( post, - path = "/v2/admin/recall-debug/panel", + path = "/v2/recall-debug/panel", tag = "recall", request_body = Value, responses( - (status = 200, description = "Cross-layer recall/debug panel.", body = Value), + (status = 200, description = "Agent-facing cross-layer recall/debug panel.", body = Value), (status = 400, description = "Invalid request.", body = ErrorBody), (status = 401, description = "Authentication required.", body = ErrorBody), - (status = 403, description = "Admin access required.", body = ErrorBody), + (status = 403, description = "Scope denied.", body = ErrorBody), (status = 500, description = "Internal error.", body = ErrorBody), ) )] @@ -3205,6 +3206,23 @@ async fn recall_debug_panel( State(state): State, headers: HeaderMap, payload: Result, JsonRejection>, +) -> Result, ApiError> { + recall_debug_panel_inner(state, headers, payload, false).await +} + +async fn admin_recall_debug_panel( + State(state): State, + headers: HeaderMap, + payload: Result, JsonRejection>, +) -> Result, ApiError> { + recall_debug_panel_inner(state, headers, payload, true).await +} + +async fn recall_debug_panel_inner( + state: AppState, + headers: HeaderMap, + payload: Result, JsonRejection>, + allow_project_trace_debug: bool, ) -> Result, ApiError> { let ctx = RequestContext::from_headers(&headers)?; let read_profile = required_read_profile(&headers)?; @@ -3228,6 +3246,7 @@ async fn recall_debug_panel( graph_predicate: payload.graph_predicate, include_dreaming: payload.include_dreaming, limit: payload.limit, + allow_project_trace_debug, }) .await?; diff --git a/apps/elf-eval/fixtures/report_snapshots/2026-06-20-recall-debug-panel-report.json b/apps/elf-eval/fixtures/report_snapshots/2026-06-20-recall-debug-panel-report.json index da62fd38..bf458106 100644 --- a/apps/elf-eval/fixtures/report_snapshots/2026-06-20-recall-debug-panel-report.json +++ b/apps/elf-eval/fixtures/report_snapshots/2026-06-20-recall-debug-panel-report.json @@ -4,8 +4,10 @@ "generated_at": "2026-06-20T00:00:00Z", "service_contract": { "response_schema": "elf.recall_debug_panel/v1", + "trace_schema": "elf.recall_trace/v1", "service_module": "packages/elf-service/src/recall_debug.rs", - "http_endpoint": "POST /v2/admin/recall-debug/panel", + "http_endpoint": "POST /v2/recall-debug/panel", + "admin_http_mirror": "POST /v2/admin/recall-debug/panel", "mcp_tool": "elf_recall_debug_panel", "spec": "docs/spec/system_recall_debug_panel_v1.md", "read_model_only": true, @@ -71,12 +73,19 @@ "freshness_state_required": true, "stage_reason_required": true, "source_refs_required": true, + "deterministic_recall_trace": true, + "stale_context_visible": true, "replay_command_or_artifact_path_required_when_available": true, "no_source_mutation": true, "no_graph_mutation": true, "no_proposal_review_mutation": true }, "command_evidence": [ + { + "command": "cargo test -p elf-service recall_trace --lib", + "status": "pass", + "purpose": "Unit-check deterministic recall_trace stale, dropped, blocked, and not_requested projection." + }, { "command": "cargo test -p elf-service recall_debug -- --nocapture", "status": "pass", diff --git a/apps/elf-eval/tests/real_world_job_benchmark.rs b/apps/elf-eval/tests/real_world_job_benchmark.rs index d57f838d..c8637300 100644 --- a/apps/elf-eval/tests/real_world_job_benchmark.rs +++ b/apps/elf-eval/tests/real_world_job_benchmark.rs @@ -11,6 +11,19 @@ use std::{ use color_eyre::{Result, eyre}; use serde_json::Value; +struct RecallDebugSourceContract<'a> { + service: &'a str, + service_lib: &'a str, + routes: &'a str, + mcp: &'a str, + recall_spec: &'a str, + service_spec: &'a str, + version_registry: &'a str, + markdown: &'a str, + benchmarking_index: &'a str, + readme: &'a str, +} + fn fixture_dir() -> PathBuf { Path::new(env!("CARGO_MANIFEST_DIR")) .join("fixtures") @@ -3700,6 +3713,56 @@ fn dreaming_review_queue_report_wires_reviewable_policy_contract() -> Result<()> Ok(()) } +fn assert_recall_debug_source_contract(sources: &RecallDebugSourceContract<'_>) { + assert!(sources.service.contains("ELF_RECALL_DEBUG_PANEL_SCHEMA_V1")); + assert!(sources.service.contains("ELF_RECALL_TRACE_SCHEMA_V1")); + assert!(sources.service.contains("pub async fn recall_debug_panel")); + assert!(sources.service.contains("build_recall_trace")); + assert!(sources.service.contains("not_requested_layer")); + assert!(sources.service.contains("blocked_layer")); + assert!(sources.service.contains("public_error_class")); + assert!(sources.service.contains("candidate_identity")); + assert!(sources.service.contains("ORG_PROJECT_ID")); + assert!(sources.service.contains("trace_bundle_get")); + assert!(sources.service.contains("docs_search_l0")); + assert!(sources.service.contains("knowledge_pages_search")); + assert!(sources.service.contains("graph_report")); + assert!(sources.service.contains("dreaming_review_queue")); + assert!(sources.service_lib.contains("pub mod recall_debug")); + assert!(sources.service_lib.contains("RecallDebugPanelResponse")); + assert!(sources.service_lib.contains("RecallTrace")); + assert!(sources.routes.contains("/v2/recall-debug/panel")); + assert!(sources.routes.contains("/v2/admin/recall-debug/panel")); + assert!(sources.routes.contains("async fn recall_debug_panel")); + assert!(sources.routes.contains("RecallDebugPanelRequest")); + assert!(sources.mcp.contains("elf_recall_debug_panel")); + assert!(sources.mcp.contains("recall_debug_panel_schema")); + assert!(sources.mcp.contains("/v2/recall-debug/panel")); + assert!(sources.recall_spec.contains("elf.recall_debug_panel/v1")); + assert!(sources.recall_spec.contains("elf.recall_trace/v1")); + assert!(sources.recall_spec.contains("not_requested")); + assert!(sources.recall_spec.contains("evidence_class = \"blocked\"")); + assert!(sources.recall_spec.contains("effective `top_k` cap of 32")); + assert!(sources.recall_spec.contains("context_state = \"stale\"")); + assert!(sources.recall_spec.contains("selected`, `dropped`, `available`, or `reviewable`")); + assert!(sources.service_spec.contains("POST /v2/recall-debug/panel")); + assert!(sources.service_spec.contains("POST /v2/admin/recall-debug/panel")); + assert!(sources.service_spec.contains("elf.recall_trace/v1")); + assert!(sources.service_spec.contains("system_recall_debug_panel_v1.md")); + assert!(sources.version_registry.contains("elf.recall_debug_panel/v1")); + assert!(sources.version_registry.contains("elf.recall_trace/v1")); + assert!(sources.markdown.contains("Recall Debug Panel Report")); + assert!(sources.markdown.contains("POST /v2/recall-debug/panel")); + assert!(sources.markdown.contains("`elf.recall_trace/v1`")); + assert!(sources.markdown.contains("Missing anchors stay visible as `not_requested`")); + assert!(sources.markdown.contains("retained dropped replay candidates")); + assert!(sources.markdown.contains("effective cap of 32 rows")); + assert!(sources.benchmarking_index.contains("2026-06-20-recall-debug-panel-report.md")); + assert!(sources.readme.contains("Recall/debug panel after XY-1022")); + assert!(sources.readme.contains("elf.recall_debug_panel/v1")); + assert!(sources.readme.contains("retained dropped replay candidates")); +} + #[test] fn recall_debug_panel_report_wires_cross_layer_debug_contract() -> Result<()> { let report = serde_json::from_str::(&fs::read_to_string( @@ -3729,6 +3792,10 @@ fn recall_debug_panel_report_wires_cross_layer_debug_contract() -> Result<()> { report.pointer("/service_contract/response_schema").and_then(Value::as_str), Some("elf.recall_debug_panel/v1") ); + assert_eq!( + report.pointer("/service_contract/trace_schema").and_then(Value::as_str), + Some("elf.recall_trace/v1") + ); assert_eq!( report.pointer("/service_contract/read_model_only").and_then(Value::as_bool), Some(true) @@ -3781,42 +3848,19 @@ fn recall_debug_panel_report_wires_cross_layer_debug_contract() -> Result<()> { report.pointer("/debug_invariants/no_source_mutation").and_then(Value::as_bool), Some(true) ); - assert!(service.contains("ELF_RECALL_DEBUG_PANEL_SCHEMA_V1")); - assert!(service.contains("pub async fn recall_debug_panel")); - assert!(service.contains("not_requested_layer")); - assert!(service.contains("blocked_layer")); - assert!(service.contains("public_error_class")); - assert!(service.contains("candidate_identity")); - assert!(service.contains("ORG_PROJECT_ID")); - assert!(service.contains("trace_bundle_get")); - assert!(service.contains("docs_search_l0")); - assert!(service.contains("knowledge_pages_search")); - assert!(service.contains("graph_report")); - assert!(service.contains("dreaming_review_queue")); - assert!(service_lib.contains("pub mod recall_debug")); - assert!(service_lib.contains("RecallDebugPanelResponse")); - assert!(routes.contains("/v2/admin/recall-debug/panel")); - assert!(routes.contains("async fn recall_debug_panel")); - assert!(routes.contains("RecallDebugPanelRequest")); - assert!(mcp.contains("elf_recall_debug_panel")); - assert!(mcp.contains("recall_debug_panel_schema")); - assert!(mcp.contains("/v2/admin/recall-debug/panel")); - assert!(recall_spec.contains("elf.recall_debug_panel/v1")); - assert!(recall_spec.contains("not_requested")); - assert!(recall_spec.contains("evidence_class = \"blocked\"")); - assert!(recall_spec.contains("effective `top_k` cap of 32")); - assert!(recall_spec.contains("selected`, `dropped`, `available`, or `reviewable`")); - assert!(service_spec.contains("POST /v2/admin/recall-debug/panel")); - assert!(service_spec.contains("system_recall_debug_panel_v1.md")); - assert!(version_registry.contains("elf.recall_debug_panel/v1")); - assert!(markdown.contains("Recall Debug Panel Report")); - assert!(markdown.contains("Missing anchors stay visible as `not_requested`")); - assert!(markdown.contains("retained dropped replay candidates")); - assert!(markdown.contains("effective cap of 32 rows")); - assert!(benchmarking_index.contains("2026-06-20-recall-debug-panel-report.md")); - assert!(readme.contains("Recall/debug panel after XY-1022")); - assert!(readme.contains("elf.recall_debug_panel/v1")); - assert!(readme.contains("retained dropped replay candidates")); + + assert_recall_debug_source_contract(&RecallDebugSourceContract { + service: &service, + service_lib: &service_lib, + routes: &routes, + mcp: &mcp, + recall_spec: &recall_spec, + service_spec: &service_spec, + version_registry: &version_registry, + markdown: &markdown, + benchmarking_index: &benchmarking_index, + readme: &readme, + }); Ok(()) } diff --git a/apps/elf-mcp/src/server.rs b/apps/elf-mcp/src/server.rs index 51693a84..8e98e4a6 100644 --- a/apps/elf-mcp/src/server.rs +++ b/apps/elf-mcp/src/server.rs @@ -375,7 +375,7 @@ impl ElfMcp { #[rmcp::tool( name = "elf_recall_debug_panel", - description = "Build a cross-layer recall/debug panel over memory traces, source documents, knowledge pages, graph facts, and Dreaming proposals.", + description = "Build an agent-facing cross-layer recall/debug panel and deterministic recall_trace over memory traces, source documents, knowledge pages, graph facts, and Dreaming proposals.", input_schema = recall_debug_panel_schema() )] async fn elf_recall_debug_panel( @@ -384,7 +384,7 @@ impl ElfMcp { ) -> Result { reject_context_override_params(¶ms)?; - self.forward(HttpMethod::Post, "/v2/admin/recall-debug/panel", params, None).await + self.forward(HttpMethod::Post, "/v2/recall-debug/panel", params, None).await } #[rmcp::tool( @@ -1793,8 +1793,8 @@ mod tests { ToolDefinition::new( "elf_recall_debug_panel", HttpMethod::Post, - "/v2/admin/recall-debug/panel", - "Build a cross-layer recall/debug panel over memory traces, source documents, knowledge pages, graph facts, and Dreaming proposals.", + "/v2/recall-debug/panel", + "Build an agent-facing cross-layer recall/debug panel and deterministic recall_trace over memory traces, source documents, knowledge pages, graph facts, and Dreaming proposals.", ), ToolDefinition::new( "elf_searches_get", @@ -2122,6 +2122,16 @@ mod tests { ); assert_eq!(mcp.api_base_for_path("/v2/admin/notes/abcd/history"), "http://127.0.0.1:9001"); assert_eq!(mcp.api_base_for_path("/v2/searches"), "http://127.0.0.1:9000"); + assert_eq!(mcp.api_base_for_path("/v2/recall-debug/panel"), "http://127.0.0.1:9000"); + } + + #[test] + fn recall_debug_tool_uses_public_agent_route() { + let tools = build_tools(); + let tool = tools.get("elf_recall_debug_panel").expect("Missing recall debug panel tool."); + + assert_eq!(tool.path, "/v2/recall-debug/panel"); + assert!(tool.description.contains("recall_trace")); } #[test] diff --git a/docs/evidence/benchmarking/2026-06-20-recall-debug-panel-report.md b/docs/evidence/benchmarking/2026-06-20-recall-debug-panel-report.md index 2b6dfd5a..358ab401 100644 --- a/docs/evidence/benchmarking/2026-06-20-recall-debug-panel-report.md +++ b/docs/evidence/benchmarking/2026-06-20-recall-debug-panel-report.md @@ -6,7 +6,7 @@ resource: docs/evidence/benchmarking/2026-06-20-recall-debug-panel-report.md status: active authority: current_state owner: evidence -last_verified: 2026-06-20 +last_verified: 2026-06-22 tags: - docs - evidence @@ -31,6 +31,11 @@ reviewable across the main Agent Knowledge OS layers. This is a product/debug su over existing authority layers, not a new mutating worker and not a replacement for the underlying trace, docs, graph, knowledge, or proposal APIs. +The agent-facing endpoint is `POST /v2/recall-debug/panel`. The local admin endpoint +`POST /v2/admin/recall-debug/panel` remains an operator mirror over the same service +read model. Responses include `elf.recall_trace/v1`, a compact deterministic +projection for selected, dropped, stale, blocked, and not-requested context. + ## Layer Coverage | Layer | Anchor | Selection states | Replay/readback | @@ -45,6 +50,11 @@ Each row exposes item refs, authority layer, freshness state, source refs or sou snapshots, score/rank when available, stage reason, evidence class, replay command, and layer-specific debug artifacts. +The embedded `elf.recall_trace/v1` projection flattens these rows into stable +layer/row order for fixture and report assertions. It carries `context_state`, +`selection_state`, freshness, source refs, score/rank, policy reason, replay command, +and evidence class without requiring raw database inspection. + The panel-level `limit` is a per-layer request cap, but the Source Library layer inherits the docs-search effective cap of 32 rows and reports requested/effective limits in document row debug artifacts. @@ -53,6 +63,7 @@ limits in document row debug artifacts. | Command | Status | Purpose | | --- | --- | --- | +| `cargo test -p elf-service recall_trace --lib` | pass | Unit-check deterministic `recall_trace` stale, dropped, blocked, and not-requested projection. | | `cargo test -p elf-service recall_debug -- --nocapture` | pass | Unit-check panel summary counters and `not_requested` layer behavior. | | `cargo test -p elf-mcp registers_all_tools -- --nocapture` | pass | Guard MCP tool registration for `elf_recall_debug_panel`. | | `cargo test -p elf-eval --test real_world_job_benchmark recall_debug_panel_report_wires_cross_layer_debug_contract -- --nocapture` | pass | Guard service, API, MCP, docs, README, and snapshot coverage for XY-1022. | @@ -66,6 +77,8 @@ Allowed: through trace bundles when candidate capture/retention preserved them. - Source documents, knowledge pages, graph facts, and Dreaming proposals can be inspected from one panel response when their anchors are supplied. +- The agent-facing panel response includes a deterministic `elf.recall_trace/v1` + projection for selected, dropped, stale, blocked, and not-requested context. - Missing anchors stay visible as `not_requested` layers instead of hidden pass claims. - Requested layer readback failures stay visible as `blocked` layers instead of diff --git a/docs/spec/system_elf_memory_service_v2.md b/docs/spec/system_elf_memory_service_v2.md index aaae9204..ad04760c 100644 --- a/docs/spec/system_elf_memory_service_v2.md +++ b/docs/spec/system_elf_memory_service_v2.md @@ -1157,16 +1157,22 @@ Behavior: - Normal recall remains active-only; `deprecated` and `deleted` notes are visible through provenance/history or explicit non-active list filters, not ordinary search. -Admin recall/debug panel: +Recall/debug panel: +- POST /v2/recall-debug/panel - POST /v2/admin/recall-debug/panel Behavior: -- The endpoint returns `elf.recall_debug_panel/v1`, a read-only cross-layer panel +- The endpoints return `elf.recall_debug_panel/v1`, a read-only cross-layer panel over Memory Note trace bundles, Source Library document search, Knowledge Workspace page search, graph reports, and Dreaming review queue proposals. +- The public route is the agent-facing recall/debug API. The admin route is an + operator mirror over the same service read model. - Each row must expose selection state, authority layer, freshness state, source refs or source snapshots, score/rank where available, stage reason, evidence class, and replay command or deterministic artifact path when available. +- Responses must include `recall_trace` with schema `elf.recall_trace/v1`: a compact + deterministic projection over selected, dropped, stale, blocked, and not-requested + context for agent and fixture/report assertions. - Missing anchors must be represented as `not_requested` layers. The panel must not collapse not-requested, incomplete, blocked, or wrong-result layers into a broad pass claim. @@ -2457,7 +2463,7 @@ Original query: - elf_admin_trajectory_get -> GET /v2/admin/trajectories/{trace_id} - elf_admin_trace_item_get -> GET /v2/admin/trace-items/{item_id} - elf_admin_trace_bundle_get -> GET /v2/admin/traces/{trace_id}/bundle - - elf_recall_debug_panel -> POST /v2/admin/recall-debug/panel + - elf_recall_debug_panel -> POST /v2/recall-debug/panel - elf_admin_note_provenance_get -> GET /v2/admin/notes/{note_id}/provenance - elf_admin_memory_history_get -> GET /v2/admin/notes/{note_id}/history - The MCP server must contain zero business logic or policy. diff --git a/docs/spec/system_recall_debug_panel_v1.md b/docs/spec/system_recall_debug_panel_v1.md index f04e5e02..ea7cee75 100644 --- a/docs/spec/system_recall_debug_panel_v1.md +++ b/docs/spec/system_recall_debug_panel_v1.md @@ -6,7 +6,7 @@ resource: docs/spec/system_recall_debug_panel_v1.md status: active authority: normative owner: memory-service -last_verified: 2026-06-20 +last_verified: 2026-06-22 tags: - spec - recall @@ -40,6 +40,18 @@ authority/freshness fields for the recall/debug panel. The response schema is `elf.recall_debug_panel/v1`. +Agent-facing endpoint: + +- `POST /v2/recall-debug/panel` + +Operator mirror: + +- `POST /v2/admin/recall-debug/panel` + +Both routes use the same service read model. The admin route is a mirror for local +operator tooling; the public route is the agent-facing recall/debug API and remains +read-only. + The panel is a read model over existing authoritative surfaces: - Memory Notes: search traces, trace items, trajectory stages, replay candidates, and @@ -52,6 +64,13 @@ The panel is a read model over existing authoritative surfaces: The panel MUST NOT mutate notes, documents, pages, graph facts, or proposals. +The response includes: + +- `summary`: aggregate layer counters. +- `recall_trace`: deterministic `elf.recall_trace/v1` projection for agent use, + fixture assertions, and compact debug readback. +- `layers`: full layer rows for detailed operator inspection. + ## Request Anchors `RecallDebugPanelRequest` requires tenant, project, agent, and read profile from the @@ -108,6 +127,39 @@ Allowed layer evidence classes are: The panel summary MUST preserve evidence class counts. Aggregate success MUST NOT hide `not_requested`, `incomplete`, `blocked`, or `wrong_result` layers. +## Recall Trace + +`recall_trace` is a compact, deterministic projection over the returned layers. It +MUST be stable in layer order and row order for the same persisted trace and backing +readback inputs. It MUST NOT include a generation timestamp. + +Each trace entry MUST include: + +- `layer` +- `context_state`: `selected`, `dropped`, `available`, `reviewable`, `stale`, + `blocked`, `not_requested`, `incomplete`, or `wrong_result`. +- `selection_state`: the original row selection state or layer evidence class. +- `authority_layer` +- `freshness_state` +- `item_ref` +- `source_refs` +- `score` and `rank` when available. +- `policy_reason`: compact stage, drop, lint, temporal, review, blocked, or + not-requested reason. +- `replay_command` when available. +- `evidence_class` +- `raw_sql_needed` + +Rows with stale or non-current freshness such as `stale`, `deprecated`, `deleted`, +`superseded`, `tombstoned`, `historical`, `archived`, `lint_warning`, or `lint_error` +MUST appear in the trace with `context_state = "stale"` while preserving their +original `selection_state`. + +Layers without rows but with `blocked`, `not_requested`, `incomplete`, or +`wrong_result` evidence MUST still contribute a trace entry carrying the layer summary +as `policy_reason`. This lets agents and reports distinguish absent anchors, +blocked readback, and actual empty pass results without raw database inspection. + ## Replay Boundary The panel may return replay commands such as `elf_admin_trace_bundle_get`, diff --git a/docs/spec/system_version_registry.md b/docs/spec/system_version_registry.md index e7e6a6aa..9f02b849 100644 --- a/docs/spec/system_version_registry.md +++ b/docs/spec/system_version_registry.md @@ -180,10 +180,23 @@ This document is normative. When a new versioned identifier is introduced, it mu (`ELF_RECALL_DEBUG_PANEL_SCHEMA_V1`) and `docs/spec/system_recall_debug_panel_v1.md`. - Consumers: `POST /v2/admin/recall-debug/panel` API response, `apps/elf-api`, - `apps/elf-mcp`, operator debugging workflows, and benchmark closeout reports. + `POST /v2/recall-debug/panel` API response, `apps/elf-mcp`, operator debugging + workflows, and benchmark closeout reports. - Bump rule: Introduce a new identifier only if layer names, selection states, evidence-class semantics, replay fields, or required row keys become incompatible. +### Recall trace schema + +- Identifier: `elf.recall_trace/v1`. +- Type: Deterministic compact recall trace projection embedded in + `elf.recall_debug_panel/v1`. +- Defined in: `packages/elf-service/src/recall_debug.rs` + (`ELF_RECALL_TRACE_SCHEMA_V1`) and `docs/spec/system_recall_debug_panel_v1.md`. +- Consumers: `POST /v2/recall-debug/panel`, `POST /v2/admin/recall-debug/panel`, + `apps/elf-api`, `apps/elf-mcp`, fixture assertions, and benchmark reports. +- Bump rule: Introduce a new identifier only if trace entry states, source-ref fields, + policy-reason semantics, or deterministic ordering become incompatible. + ### Search filter expression schema - Identifier: `search_filter_expr/v1`. diff --git a/packages/elf-service/src/lib.rs b/packages/elf-service/src/lib.rs index 012faf48..50e36220 100644 --- a/packages/elf-service/src/lib.rs +++ b/packages/elf-service/src/lib.rs @@ -119,9 +119,9 @@ pub use self::{ NoteProvenanceRecentTrace, }, recall_debug::{ - ELF_RECALL_DEBUG_PANEL_SCHEMA_V1, RecallDebugLayer, RecallDebugPanelRequest, - RecallDebugPanelRequestEcho, RecallDebugPanelResponse, RecallDebugPanelSummary, - RecallDebugRow, + ELF_RECALL_DEBUG_PANEL_SCHEMA_V1, ELF_RECALL_TRACE_SCHEMA_V1, RecallDebugLayer, + RecallDebugPanelRequest, RecallDebugPanelRequestEcho, RecallDebugPanelResponse, + RecallDebugPanelSummary, RecallDebugRow, RecallTrace, RecallTraceEntry, RecallTraceSummary, }, search::{ BlendRankingOverride, BlendSegmentOverride, PayloadLevel, QueryPlan, QueryPlanBlendSegment, diff --git a/packages/elf-service/src/recall_debug.rs b/packages/elf-service/src/recall_debug.rs index 9c10b593..ebed00d5 100644 --- a/packages/elf-service/src/recall_debug.rs +++ b/packages/elf-service/src/recall_debug.rs @@ -1,10 +1,9 @@ //! Cross-layer recall/debug panel readback. -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::{BTreeMap, BTreeSet, HashSet}; use serde::{Deserialize, Serialize}; use serde_json::Value; -use sqlx::FromRow; use time::OffsetDateTime; use uuid::Uuid; @@ -13,12 +12,15 @@ use crate::{ GraphQueryPredicateRef, GraphReportRequest, KnowledgePageSearchItem, KnowledgePageSearchRequest, Result, SearchExplainItem, SearchTrajectoryStage, TraceBundleGetRequest, - access::ORG_PROJECT_ID, - search::{TraceBundleMode, TraceReplayCandidate}, + access::{self, ORG_PROJECT_ID, SharedSpaceGrantKey}, + search::{self, TraceBundleMode, TraceReplayCandidate}, }; +use elf_storage::models::MemoryNote; /// Schema identifier for recall/debug panel responses. pub const ELF_RECALL_DEBUG_PANEL_SCHEMA_V1: &str = "elf.recall_debug_panel/v1"; +/// Schema identifier for deterministic recall trace projections. +pub const ELF_RECALL_TRACE_SCHEMA_V1: &str = "elf.recall_trace/v1"; const DEFAULT_RECALL_DEBUG_LIMIT: u32 = 25; const MAX_RECALL_DEBUG_LIMIT: u32 = 100; @@ -51,6 +53,9 @@ pub struct RecallDebugPanelRequest { pub include_dreaming: Option, /// Maximum rows per layer. pub limit: Option, + #[serde(skip)] + /// Whether project-scoped trace anchors are allowed for an admin mirror request. + pub allow_project_trace_debug: bool, } /// Cross-layer recall/debug panel response. @@ -65,10 +70,75 @@ pub struct RecallDebugPanelResponse { pub request: RecallDebugPanelRequestEcho, /// Aggregate panel summary. pub summary: RecallDebugPanelSummary, + /// Deterministic flat trace projection for agents and fixture assertions. + pub recall_trace: RecallTrace, /// Cross-layer rows grouped by source layer. pub layers: Vec, } +/// Deterministic flat recall trace over all requested layers. +#[derive(Clone, Debug, Serialize)] +pub struct RecallTrace { + /// Trace schema identifier. + pub schema: String, + /// Aggregate trace counters. + pub summary: RecallTraceSummary, + /// Stable trace entries in layer and row order. + pub entries: Vec, +} + +/// Aggregate counters for a recall trace. +#[derive(Clone, Debug, Default, Serialize)] +pub struct RecallTraceSummary { + /// Number of trace entries. + pub entry_count: usize, + /// Entries whose row selection state is selected. + pub selected_count: usize, + /// Entries whose row selection state is dropped. + pub dropped_count: usize, + /// Entries whose freshness state indicates stale or non-current evidence. + pub stale_count: usize, + /// Entries representing blocked layers. + pub blocked_count: usize, + /// Entries representing layers that were not requested. + pub not_requested_count: usize, + /// Entries that require raw SQL for diagnosis. + pub raw_sql_needed_count: usize, + /// Entries with a replay command or deterministic artifact path. + pub replay_command_count: usize, +} + +/// One compact recall trace entry. +#[derive(Clone, Debug, Serialize)] +pub struct RecallTraceEntry { + /// Layer identifier. + pub layer: String, + /// Primary trace state for compact assertions. + pub context_state: String, + /// Original row selection state or layer evidence class. + pub selection_state: String, + /// Authority layer that owns the context. + pub authority_layer: String, + /// Freshness or temporal state. + pub freshness_state: String, + /// Stable identifiers for replay or hydration. + pub item_ref: Value, + /// Source refs or source snapshots supporting the context. + pub source_refs: Value, + /// Optional score. + pub score: Option, + /// Optional rank. + pub rank: Option, + /// Compact policy or stage reason for the state. + pub policy_reason: Option, + /// Replay command or deterministic artifact path. + pub replay_command: Option, + /// Layer or row evidence class. + pub evidence_class: String, + /// Whether raw SQL is required to diagnose this entry. + pub raw_sql_needed: bool, +} + /// Stable request echo for panel responses. #[derive(Clone, Debug, Serialize)] pub struct RecallDebugPanelRequestEcho { @@ -169,9 +239,8 @@ pub struct RecallDebugRow { pub debug_artifacts: Value, } -#[derive(Clone, Debug, FromRow)] +#[derive(Clone, Debug)] struct NoteDebugSourceRow { - note_id: Uuid, status: String, source_ref: Value, updated_at: OffsetDateTime, @@ -252,6 +321,7 @@ impl ElfService { ); let summary = summarize_layers(&layers); + let recall_trace = build_recall_trace(&layers); Ok(RecallDebugPanelResponse { schema: ELF_RECALL_DEBUG_PANEL_SCHEMA_V1.to_string(), @@ -265,6 +335,7 @@ impl ElfService { limit, }, summary, + recall_trace, layers, }) } @@ -280,6 +351,11 @@ impl ElfService { "Supply trace_id to show selected and dropped Memory Note candidates.", )); }; + + if !req.allow_project_trace_debug { + self.ensure_public_recall_trace_allowed(req, trace_id).await?; + } + let bundle = self .trace_bundle_get(TraceBundleGetRequest { tenant_id: req.tenant_id.clone(), @@ -303,12 +379,18 @@ impl ElfService { .load_memory_note_debug_sources(req, all_note_ids.iter().copied().collect()) .await?; let replay_command = format!("elf_admin_trace_bundle_get trace_id={trace_id} mode=bounded"); + let visible_items = bundle + .items + .iter() + .filter(|item| source_refs.contains_key(&item.note_id)) + .collect::>(); let dropped_candidates = bundle .candidates .as_deref() .unwrap_or_default() .iter() .filter(|candidate| !candidate_is_selected(&selected_candidate_keys, candidate)) + .filter(|candidate| source_refs.contains_key(&candidate.note_id)) .collect::>(); let selected_cap = if !dropped_candidates.is_empty() && limit > 1 { limit as usize - 1 @@ -317,7 +399,7 @@ impl ElfService { }; let mut rows = Vec::new(); - for item in bundle.items.iter().take(selected_cap) { + for item in visible_items.iter().take(selected_cap) { let source = source_refs.get(&item.note_id); rows.push(RecallDebugRow { @@ -366,6 +448,38 @@ impl ElfService { )) } + async fn ensure_public_recall_trace_allowed( + &self, + req: &RecallDebugPanelRequest, + trace_id: Uuid, + ) -> Result<()> { + let row: Option<(i64,)> = sqlx::query_as( + "\ +SELECT 1 +FROM search_traces +WHERE trace_id = $1 + AND tenant_id = $2 + AND project_id = $3 + AND agent_id = $4 + AND read_profile = $5", + ) + .bind(trace_id) + .bind(req.tenant_id.trim()) + .bind(req.project_id.trim()) + .bind(req.agent_id.trim()) + .bind(req.read_profile.trim()) + .fetch_optional(&self.db.pool) + .await?; + + if row.is_some() { + Ok(()) + } else { + Err(Error::InvalidRequest { + message: "Unknown trace_id for this recall context.".to_string(), + }) + } + } + async fn recall_docs_layer( &self, req: &RecallDebugPanelRequest, @@ -681,9 +795,9 @@ impl ElfService { return Ok(BTreeMap::new()); } - let rows = sqlx::query_as::<_, NoteDebugSourceRow>( + let rows = sqlx::query_as::<_, MemoryNote>( "\ -SELECT note_id, status, source_ref, updated_at +SELECT * FROM memory_notes WHERE tenant_id = $1 AND note_id = ANY($3::uuid[]) @@ -699,10 +813,68 @@ FROM memory_notes .fetch_all(&self.db.pool) .await?; - Ok(rows.into_iter().map(|row| (row.note_id, row)).collect()) + if req.allow_project_trace_debug { + return Ok(rows.into_iter().map(note_debug_source_pair).collect()); + } + + let allowed_scopes = + search::resolve_read_profile_scopes(&self.cfg, req.read_profile.trim())?; + let org_shared_allowed = allowed_scopes.iter().any(|scope| scope == "org_shared"); + let shared_grants = access::load_shared_read_grants_with_org_shared( + &self.db.pool, + req.tenant_id.trim(), + req.project_id.trim(), + req.agent_id.trim(), + org_shared_allowed, + ) + .await?; + + Ok(rows + .into_iter() + .filter(|note| { + note_debug_read_allowed(note, req.agent_id.trim(), &allowed_scopes, &shared_grants) + }) + .map(note_debug_source_pair) + .collect()) } } +fn note_debug_source_pair(note: MemoryNote) -> (Uuid, NoteDebugSourceRow) { + ( + note.note_id, + NoteDebugSourceRow { + status: note.status, + source_ref: note.source_ref, + updated_at: note.updated_at, + }, + ) +} + +fn note_debug_read_allowed( + note: &MemoryNote, + requester_agent_id: &str, + allowed_scopes: &[String], + shared_grants: &HashSet, +) -> bool { + if !allowed_scopes.iter().any(|scope| scope == ¬e.scope) { + return false; + } + if note.scope == "agent_private" { + return note.agent_id == requester_agent_id; + } + if !matches!(note.scope.as_str(), "project_shared" | "org_shared") { + return false; + } + if note.agent_id == requester_agent_id { + return true; + } + + shared_grants.contains(&SharedSpaceGrantKey { + scope: note.scope.clone(), + space_owner_agent_id: note.agent_id.clone(), + }) +} + fn candidate_debug_row( trace_id: Uuid, candidate: &TraceReplayCandidate, @@ -782,6 +954,123 @@ fn summarize_layers(layers: &[RecallDebugLayer]) -> RecallDebugPanelSummary { summary } +fn build_recall_trace(layers: &[RecallDebugLayer]) -> RecallTrace { + let mut entries = Vec::new(); + + for layer in layers { + if layer.rows.is_empty() { + if matches!( + layer.evidence_class.as_str(), + "blocked" | "not_requested" | "incomplete" | "wrong_result" + ) { + entries.push(layer_trace_entry(layer)); + } + + continue; + } + + entries.extend(layer.rows.iter().map(row_trace_entry)); + } + + let summary = summarize_trace_entries(&entries); + + RecallTrace { schema: ELF_RECALL_TRACE_SCHEMA_V1.to_string(), summary, entries } +} + +fn summarize_trace_entries(entries: &[RecallTraceEntry]) -> RecallTraceSummary { + let mut summary = RecallTraceSummary { entry_count: entries.len(), ..Default::default() }; + + for entry in entries { + match entry.selection_state.as_str() { + "selected" => summary.selected_count += 1, + "dropped" => summary.dropped_count += 1, + "blocked" => summary.blocked_count += 1, + "not_requested" => summary.not_requested_count += 1, + _ => {}, + } + + if entry.context_state == "stale" || stale_freshness_state(&entry.freshness_state) { + summary.stale_count += 1; + } + if entry.raw_sql_needed { + summary.raw_sql_needed_count += 1; + } + if entry.replay_command.as_ref().is_some_and(|value| !value.is_empty()) { + summary.replay_command_count += 1; + } + } + + summary +} + +fn layer_trace_entry(layer: &RecallDebugLayer) -> RecallTraceEntry { + let context_state = match layer.evidence_class.as_str() { + "not_requested" => "not_requested", + "blocked" => "blocked", + "incomplete" => "incomplete", + "wrong_result" => "wrong_result", + _ => "available", + }; + + RecallTraceEntry { + layer: layer.layer.clone(), + context_state: context_state.to_string(), + selection_state: layer.evidence_class.clone(), + authority_layer: layer.layer.clone(), + freshness_state: layer.evidence_class.clone(), + item_ref: serde_json::json!({ + "layer": layer.layer.clone(), + "anchor": layer.anchor.clone(), + }), + source_refs: serde_json::json!([]), + score: None, + rank: None, + policy_reason: Some(layer.summary.clone()), + replay_command: None, + evidence_class: layer.evidence_class.clone(), + raw_sql_needed: layer.raw_sql_needed, + } +} + +fn row_trace_entry(row: &RecallDebugRow) -> RecallTraceEntry { + let context_state = if stale_freshness_state(&row.freshness_state) { + "stale" + } else { + row.selection_state.as_str() + }; + + RecallTraceEntry { + layer: row.layer.clone(), + context_state: context_state.to_string(), + selection_state: row.selection_state.clone(), + authority_layer: row.authority_layer.clone(), + freshness_state: row.freshness_state.clone(), + item_ref: row.item_ref.clone(), + source_refs: row.source_refs.clone(), + score: row.score, + rank: row.rank, + policy_reason: row.stage_reason.clone().or_else(|| row.rationale.clone()), + replay_command: row.replay_command.clone(), + evidence_class: row.evidence_class.clone(), + raw_sql_needed: false, + } +} + +fn stale_freshness_state(freshness_state: &str) -> bool { + matches!( + freshness_state, + "stale" + | "deprecated" + | "deleted" + | "superseded" + | "tombstoned" + | "historical" + | "archived" + | "lint_warning" + | "lint_error" + ) +} + fn layer_from_rows( layer: &str, evidence_class: &str, @@ -927,10 +1216,16 @@ fn graph_temporal_status(status: crate::RelationTemporalStatus) -> String { #[cfg(test)] mod tests { + use std::collections::HashSet; + + use time::OffsetDateTime; + use crate::{ RecallDebugRow, + access::SharedSpaceGrantKey, recall_debug::{self, BTreeSet, Error, Uuid}, }; + use elf_storage::models::MemoryNote; #[test] fn summary_preserves_not_requested_and_replay_counts() { @@ -1037,4 +1332,156 @@ mod tests { assert!(selected.contains(&recall_debug::candidate_identity(note_id, selected_chunk_id))); assert!(!selected.contains(&recall_debug::candidate_identity(note_id, dropped_chunk_id))); } + + #[test] + fn debug_note_readability_preserves_stale_owner_context_only() { + let allowed_scopes = vec!["agent_private".to_string(), "project_shared".to_string()]; + let shared_grants = HashSet::new(); + let mut note = note_for_debug_visibility("owner-agent", "agent_private", "deprecated"); + + assert!(recall_debug::note_debug_read_allowed( + ¬e, + "owner-agent", + &allowed_scopes, + &shared_grants + )); + assert!(!recall_debug::note_debug_read_allowed( + ¬e, + "other-agent", + &allowed_scopes, + &shared_grants + )); + + note.scope = "project_shared".to_string(); + + assert!(!recall_debug::note_debug_read_allowed( + ¬e, + "other-agent", + &allowed_scopes, + &shared_grants + )); + + let shared_grants = HashSet::from([SharedSpaceGrantKey { + scope: "project_shared".to_string(), + space_owner_agent_id: "owner-agent".to_string(), + }]); + + assert!(recall_debug::note_debug_read_allowed( + ¬e, + "other-agent", + &allowed_scopes, + &shared_grants + )); + } + + #[test] + fn recall_trace_flattens_stale_and_dropped_context() { + let layers = vec![ + recall_debug::layer_from_rows( + "memory_notes", + "pass", + Some("trace".to_string()), + "trace rows", + vec![ + RecallDebugRow { + layer: "memory_notes".to_string(), + item_ref: serde_json::json!({"note_id": "selected-stale"}), + selection_state: "selected".to_string(), + authority_layer: "memory_note".to_string(), + freshness_state: "deprecated".to_string(), + source_refs: serde_json::json!([{"schema": "source_ref/v1"}]), + score: Some(0.9), + rank: Some(1), + rationale: Some("selected but stale".to_string()), + stage_reason: Some("status=deprecated".to_string()), + replay_command: Some("elf_trace".to_string()), + evidence_class: "pass".to_string(), + debug_artifacts: serde_json::json!({}), + }, + RecallDebugRow { + layer: "memory_notes".to_string(), + item_ref: serde_json::json!({"note_id": "dropped"}), + selection_state: "dropped".to_string(), + authority_layer: "memory_note".to_string(), + freshness_state: "active".to_string(), + source_refs: serde_json::json!([]), + score: Some(0.4), + rank: Some(4), + rationale: Some("candidate not narrated".to_string()), + stage_reason: Some("not_in_final_top_k".to_string()), + replay_command: Some("elf_trace".to_string()), + evidence_class: "pass".to_string(), + debug_artifacts: serde_json::json!({}), + }, + ], + ), + recall_debug::not_requested_layer("graph_facts", "missing graph subject"), + ]; + let trace = recall_debug::build_recall_trace(&layers); + + assert_eq!(trace.schema, "elf.recall_trace/v1"); + assert_eq!(trace.summary.entry_count, 3); + assert_eq!(trace.summary.selected_count, 1); + assert_eq!(trace.summary.dropped_count, 1); + assert_eq!(trace.summary.stale_count, 1); + assert_eq!(trace.summary.not_requested_count, 1); + assert_eq!(trace.summary.replay_command_count, 2); + assert_eq!(trace.entries[0].context_state, "stale"); + assert_eq!(trace.entries[0].policy_reason.as_deref(), Some("status=deprecated")); + assert_eq!(trace.entries[1].context_state, "dropped"); + assert_eq!(trace.entries[1].policy_reason.as_deref(), Some("not_in_final_top_k")); + assert_eq!(trace.entries[2].context_state, "not_requested"); + } + + #[test] + fn recall_trace_counts_blocked_layers_without_backend_details() { + let layer = recall_debug::blocked_layer( + "source_documents", + Some("alpha".to_string()), + "docs search failed", + &Error::Storage { message: "password=secret host=db.internal".to_string() }, + ); + let trace = recall_debug::build_recall_trace(&[layer]); + + assert_eq!(trace.summary.blocked_count, 1); + assert_eq!(trace.entries[0].context_state, "blocked"); + assert_eq!(trace.entries[0].selection_state, "blocked"); + assert!( + trace.entries[0] + .policy_reason + .as_deref() + .is_some_and(|reason| reason.contains("error_class=storage_unavailable")) + ); + assert!( + trace.entries[0] + .policy_reason + .as_deref() + .is_some_and(|reason| !reason.contains("password=secret")) + ); + } + + fn note_for_debug_visibility(agent_id: &str, scope: &str, status: &str) -> MemoryNote { + let now = OffsetDateTime::now_utc(); + + MemoryNote { + note_id: Uuid::new_v4(), + tenant_id: "tenant-a".to_string(), + project_id: "project-a".to_string(), + agent_id: agent_id.to_string(), + scope: scope.to_string(), + r#type: "fact".to_string(), + key: None, + text: "Fact: debug visibility test note.".to_string(), + importance: 0.7, + confidence: 0.9, + status: status.to_string(), + created_at: now, + updated_at: now, + expires_at: None, + embedding_version: "test:v1".to_string(), + source_ref: serde_json::json!({"schema": "source_ref/v1"}), + hit_count: 0, + last_hit_at: None, + } + } }