diff --git a/apps/elf-api/src/routes.rs b/apps/elf-api/src/routes.rs index 24e0b0d..9ccf782 100644 --- a/apps/elf-api/src/routes.rs +++ b/apps/elf-api/src/routes.rs @@ -50,24 +50,25 @@ use elf_service::{ ConsolidationRunsListRequest, ConsolidationRunsListResponse, CoreBlockAttachRequest, CoreBlockAttachResponse, CoreBlockDetachRequest, CoreBlockDetachResponse, CoreBlockUpsertRequest, CoreBlockUpsertResponse, CoreBlocksGetRequest, CoreBlocksResponse, - DeleteRequest, DeleteResponse, DocType, DocsExcerptResponse, DocsExcerptsGetRequest, - DocsGetRequest, DocsGetResponse, DocsPutRequest, DocsPutResponse, DocsSearchL0Request, - DocsSearchL0Response, DreamingReviewQueueRequest, DreamingReviewQueueResponse, - EntityMemoryViewRequest, EntityMemoryViewResponse, Error, EventMessage, GranteeKind, - GraphQueryEntityRef, GraphQueryPredicateRef, GraphQueryRequest, GraphQueryResponse, - GraphReportRequest, GraphReportResponse, IngestionProfileSelector, KnowledgePageChangedSource, - KnowledgePageGetRequest, KnowledgePageLintRequest, KnowledgePageLintResponse, - KnowledgePageRebuildRequest, KnowledgePageRebuildResponse, KnowledgePageResponse, - KnowledgePageSearchRequest, KnowledgePageSearchResponse, KnowledgePageWatchRebuildRequest, - KnowledgePageWatchRebuildResponse, KnowledgePagesListRequest, KnowledgePagesListResponse, - ListRequest, ListResponse, MemoryCorrectionAction, MemoryCorrectionRequest, - MemoryCorrectionResponse, MemoryHistoryGetRequest, MemoryHistoryResponse, NoteFetchRequest, - NoteFetchResponse, NoteProvenanceBundleResponse, NoteProvenanceGetRequest, PayloadLevel, - PublishNoteRequest, QueryPlan, RankingRequestOverride, RebuildReport, RecallDebugPanelRequest, - RecallDebugPanelResponse, SearchDetailsRequest, SearchDetailsResult, SearchExplainRequest, - SearchExplainResponse, SearchIndexItem, SearchRequest, SearchResponse, SearchSessionGetRequest, - SearchTimelineGroup, SearchTimelineRequest, SearchTrajectoryResponse, SearchTrajectorySummary, - ShareScope, SpaceGrantRevokeRequest, SpaceGrantRevokeResponse, SpaceGrantUpsertRequest, + DeleteRequest, DeleteResponse, DocType, DocsDeleteRequest, DocsDeleteResponse, + DocsExcerptResponse, DocsExcerptsGetRequest, DocsGetRequest, DocsGetResponse, DocsPutRequest, + DocsPutResponse, DocsSearchL0Request, DocsSearchL0Response, DreamingReviewQueueRequest, + DreamingReviewQueueResponse, EntityMemoryViewRequest, EntityMemoryViewResponse, Error, + EventMessage, GranteeKind, GraphQueryEntityRef, GraphQueryPredicateRef, GraphQueryRequest, + GraphQueryResponse, GraphReportRequest, GraphReportResponse, IngestionProfileSelector, + KnowledgePageChangedSource, KnowledgePageGetRequest, KnowledgePageLintRequest, + KnowledgePageLintResponse, KnowledgePageRebuildRequest, KnowledgePageRebuildResponse, + KnowledgePageResponse, KnowledgePageSearchRequest, KnowledgePageSearchResponse, + KnowledgePageWatchRebuildRequest, KnowledgePageWatchRebuildResponse, KnowledgePagesListRequest, + KnowledgePagesListResponse, ListRequest, ListResponse, MemoryCorrectionAction, + MemoryCorrectionRequest, MemoryCorrectionResponse, MemoryHistoryGetRequest, + MemoryHistoryResponse, NoteFetchRequest, NoteFetchResponse, NoteProvenanceBundleResponse, + NoteProvenanceGetRequest, PayloadLevel, PublishNoteRequest, QueryPlan, RankingRequestOverride, + RebuildReport, RecallDebugPanelRequest, RecallDebugPanelResponse, SearchDetailsRequest, + SearchDetailsResult, SearchExplainRequest, SearchExplainResponse, SearchIndexItem, + SearchRequest, SearchResponse, SearchSessionGetRequest, SearchTimelineGroup, + SearchTimelineRequest, SearchTrajectoryResponse, SearchTrajectorySummary, ShareScope, + SpaceGrantRevokeRequest, SpaceGrantRevokeResponse, SpaceGrantUpsertRequest, SpaceGrantsListRequest, TextPositionSelector, TextQuoteSelector, TraceBundleGetRequest, TraceBundleResponse, TraceGetRequest, TraceGetResponse, TraceRecentListRequest, TraceRecentListResponse, TraceTrajectoryGetRequest, UnpublishNoteRequest, UpdateRequest, @@ -116,6 +117,7 @@ const VIEWER_HTML: &str = include_str!("../static/viewer.html"); events_ingest, docs_put, docs_get, + docs_delete, docs_search_l0, docs_excerpts_get, core_blocks_get, @@ -737,7 +739,7 @@ pub fn router(state: AppState) -> Router { .layer(DefaultBodyLimit::max(MAX_REQUEST_BYTES)); let docs_router = Router::new() .route("/v2/docs", routing::post(docs_put)) - .route("/v2/docs/{doc_id}", routing::get(docs_get)) + .route("/v2/docs/{doc_id}", routing::get(docs_get).delete(docs_delete)) .route("/v2/docs/search/l0", routing::post(docs_search_l0)) .route("/v2/docs/excerpts", routing::post(docs_excerpts_get)) .with_state(state) @@ -1756,6 +1758,39 @@ async fn docs_get_inner( Ok(Json(response)) } +#[utoipa::path( + delete, + path = "/v2/docs/{doc_id}", + tag = "docs", + params(("doc_id" = Uuid, Path, description = "Document ID.")), + responses( + (status = 200, description = "Document was deleted.", body = Value), + (status = 400, description = "Invalid request.", body = ErrorBody), + (status = 401, description = "Authentication required.", body = ErrorBody), + (status = 403, description = "Scope denied.", body = ErrorBody), + (status = 404, description = "Document was not found.", body = ErrorBody), + (status = 500, description = "Internal error.", body = ErrorBody), + ) +)] +async fn docs_delete( + State(state): State, + headers: HeaderMap, + Path(doc_id): Path, +) -> Result, ApiError> { + let ctx = RequestContext::from_headers(&headers)?; + let response = state + .service + .docs_delete(DocsDeleteRequest { + tenant_id: ctx.tenant_id, + project_id: ctx.project_id, + agent_id: ctx.agent_id, + doc_id, + }) + .await?; + + Ok(Json(response)) +} + #[utoipa::path( post, path = "/v2/docs/search/l0", @@ -3532,6 +3567,7 @@ async fn knowledge_pages_search( payload: Result, JsonRejection>, ) -> Result, ApiError> { let ctx = RequestContext::from_headers(&headers)?; + let read_profile = required_read_profile(&headers)?; let Json(payload) = payload.map_err(|err| { tracing::warn!(error = %err, "Invalid request payload."); @@ -3542,6 +3578,8 @@ async fn knowledge_pages_search( .knowledge_pages_search(KnowledgePageSearchRequest { tenant_id: ctx.tenant_id, project_id: ctx.project_id, + agent_id: ctx.agent_id, + read_profile, query: payload.query, page_kind: payload.page_kind, limit: payload.limit, diff --git a/apps/elf-eval/src/bin/real_world_live_adapter.rs b/apps/elf-eval/src/bin/real_world_live_adapter.rs index 0e6f6f3..b7b92c3 100644 --- a/apps/elf-eval/src/bin/real_world_live_adapter.rs +++ b/apps/elf-eval/src/bin/real_world_live_adapter.rs @@ -4681,6 +4681,8 @@ async fn materialize_elf_knowledge( .knowledge_pages_search(KnowledgePageSearchRequest { tenant_id: TENANT_ID.to_string(), project_id, + agent_id: AGENT_ID.to_string(), + read_profile: "private_only".to_string(), query: "source notes".to_string(), page_kind: Some(KnowledgePageKind::Project), limit: Some(10), diff --git a/apps/elf-mcp/src/server.rs b/apps/elf-mcp/src/server.rs index 9cce54f..c68c7a9 100644 --- a/apps/elf-mcp/src/server.rs +++ b/apps/elf-mcp/src/server.rs @@ -307,6 +307,18 @@ impl ElfMcp { self.forward(HttpMethod::Get, &path, JsonObject::new(), None).await } + #[rmcp::tool( + name = "elf_docs_delete", + description = "Delete a Source Library document by doc_id and enqueue derived doc-vector removal.", + input_schema = docs_get_schema() + )] + async fn elf_docs_delete(&self, mut params: JsonObject) -> Result { + let doc_id = take_required_string(&mut params, "doc_id")?; + let path = format!("/v2/docs/{doc_id}"); + + self.forward(HttpMethod::Delete, &path, JsonObject::new(), None).await + } + #[rmcp::tool( name = "elf_docs_search_l0", description = "Run a minimal Doc search (L0): chunk-level results with short snippets.", diff --git a/docs/evidence/2026-06-23-privacy-delete-export-boundaries-drift-audit.md b/docs/evidence/2026-06-23-privacy-delete-export-boundaries-drift-audit.md new file mode 100644 index 0000000..af253d8 --- /dev/null +++ b/docs/evidence/2026-06-23-privacy-delete-export-boundaries-drift-audit.md @@ -0,0 +1,127 @@ +--- +type: Drift Audit +title: "Privacy, Delete, Export, and Retention Boundaries Drift Audit" +description: "Drift audit for current-recall suppression across Source Library, Knowledge Workspace, graph-lite facts, and relation context." +resource: docs/evidence/2026-06-23-privacy-delete-export-boundaries-drift-audit.md +status: active +authority: evidence +owner: evidence +last_verified: 2026-06-23 +tags: + - docs + - evidence + - privacy + - retention +source_refs: + - docs/runbook/privacy_delete_export.md +code_refs: + - apps/elf-api/src/routes.rs + - apps/elf-mcp/src/server.rs + - packages/elf-service/src/docs.rs + - packages/elf-service/src/graph_query.rs + - packages/elf-service/src/graph_report.rs + - packages/elf-service/src/knowledge.rs + - packages/elf-service/src/search.rs + - packages/elf-storage/src/docs.rs + - packages/elf-storage/src/knowledge.rs + - packages/elf-service/tests/acceptance/docs_extension_v1.rs + - packages/elf-service/tests/acceptance/graph_ingestion.rs + - packages/elf-service/tests/acceptance/knowledge_pages.rs +related: + - docs/spec/system_doc_source_ref_v1.md + - docs/spec/system_elf_memory_service_v2.md + - docs/spec/system_graph_memory_postgres_v1.md + - docs/spec/system_knowledge_pages_v1.md +drift_watch: + - docs/runbook/privacy_delete_export.md + - docs/spec/system_doc_source_ref_v1.md + - docs/spec/system_elf_memory_service_v2.md + - docs/spec/system_graph_memory_postgres_v1.md + - docs/spec/system_knowledge_pages_v1.md + - apps/elf-api/src/routes.rs + - apps/elf-mcp/src/server.rs + - packages/elf-service/src/docs.rs + - packages/elf-service/src/graph_query.rs + - packages/elf-service/src/graph_report.rs + - packages/elf-service/src/knowledge.rs + - packages/elf-service/src/search.rs + - packages/elf-storage/src/docs.rs + - packages/elf-storage/src/knowledge.rs +--- +# Privacy, Delete, Export, and Retention Boundaries Drift Audit + +Purpose: Record the code and test evidence behind the privacy/delete/export boundary +docs added for XY-1078. +Read this when: You need to verify whether docs for source deletion, private spans, +graph evidence suppression, and export boundaries match current code. +Not this document: A legal compliance assessment, provider terms review, or raw +benchmark report. +Evidence for: `docs/runbook/privacy_delete_export.md` and the related Source +Library, Knowledge Workspace, graph memory, and core service specs. + +## Claims Checked + +- Source Library direct and derived readback uses current active source rows for + recallable snippets. +- Source Library delete has an explicit public HTTP and MCP path that marks the + source non-active and enqueues derived doc-vector deletion. +- Knowledge Workspace page search suppresses snippets whose normalized source refs + are deleted, expired, unreadable, ignored, rejected, unapplied, or contain + non-captured spans. +- Graph query, graph report, and search relation context return facts only when + current readable evidence notes exist and omit deleted or unreadable evidence ids. +- Delete and forget docs distinguish current-recall suppression from retained + provenance, history, trace, and benchmark evidence. +- Export docs route through authorized read APIs and do not describe a bypass around + scope, payload level, or write-policy spans. + +## Implementation Evidence + +- `apps/elf-api/src/routes.rs` exposes `DELETE /v2/docs/{doc_id}` through the public + docs router and OpenAPI path list. +- `apps/elf-mcp/src/server.rs` exposes `elf_docs_delete` as a thin MCP forwarding + tool with no policy logic. +- `packages/elf-service/src/docs.rs` marks owned Source Library documents deleted and + enqueues one doc-index `DELETE` outbox job per persisted chunk. +- `packages/elf-storage/src/docs.rs` provides the status update used by the service + delete path. +- `packages/elf-storage/src/knowledge.rs` now resolves Knowledge Workspace note, + event, relation, document, and chunk sources through active/readable source rows. +- `packages/elf-service/src/knowledge.rs` resolves current source keys before page + search and suppresses sections with non-recallable source refs or non-captured + spans. +- `packages/elf-service/src/graph_query.rs` and + `packages/elf-service/src/graph_report.rs` require active, unexpired, readable + graph evidence notes for fact readback. +- `packages/elf-service/src/search.rs` filters relation-context evidence notes to + active, unexpired, readable notes and drops malformed relation rows with no + evidence ids. + +## Test Evidence + +- `packages/elf-service/src/knowledge.rs` has pure coverage for deleted, ignored, + missing, and non-captured source refs. +- `packages/elf-service/src/graph_query.rs` has pure coverage for suppressing graph + rows without readable evidence. +- `packages/elf-service/src/graph_report.rs` has pure coverage for suppressing graph + report facts without readable evidence. +- `packages/elf-service/src/search.rs` has pure coverage for suppressing relation + context rows without evidence. +- `packages/elf-service/tests/acceptance/docs_extension_v1.rs` adds an ignored + integration case for Source Library delete marking the doc deleted, enqueueing + doc-vector deletion, suppressing direct/search readback, and removing Qdrant doc + points. +- `packages/elf-service/tests/acceptance/knowledge_pages.rs` adds an ignored + integration case for Source Library document deletion suppressing page search. +- `packages/elf-service/tests/acceptance/graph_ingestion.rs` adds an ignored + integration case for memory-note delete suppressing graph query readback. + +## Residual Boundaries + +- Provenance, note history, recall traces, and checked benchmark artifacts are audit + evidence, not current recall. They may retain historical ids or snippets until + their own retention or purge path runs. +- Provider retention remains outside ELF control once content is sent to external + embedding, rerank, or LLM extractor providers. +- The runbook does not claim a universal public erase endpoint. Operators must act on + the explicit authority surface and verify derived projections. diff --git a/docs/evidence/index.md b/docs/evidence/index.md index b87855d..d5c3f31 100644 --- a/docs/evidence/index.md +++ b/docs/evidence/index.md @@ -22,5 +22,8 @@ Routes to: Drift audits and evidence concepts under `docs/evidence/`. proposal contract. - `2026-06-23-local-agent-loop-drift-audit.md`: Drift audit for the one-command local setup and agent integration recipes. +- `2026-06-23-privacy-delete-export-boundaries-drift-audit.md`: Drift audit for + privacy, delete/forget, export, retention, source visibility, and graph evidence + suppression boundaries. - `external_memory_pattern_radar_latest.md`: Latest weekly external memory pattern radar summary. diff --git a/docs/index.md b/docs/index.md index 5816802..2ef5de6 100644 --- a/docs/index.md +++ b/docs/index.md @@ -36,6 +36,8 @@ The split below is by question type, not by human-versus-agent audience. - Need one-command local setup, the minimal memory+knowledge demo loop, or Codex/Claude/Cursor/MCP/CLI agent integration recipes -> `docs/runbook/agent-setup.md` +- Need privacy, delete/forget, export, retention, connected-source, provider, or + private-span operating boundaries -> `docs/runbook/privacy_delete_export.md` - Need the single-user production backup, restore, and Qdrant rebuild path -> `docs/runbook/single_user_production.md` - Need benchmark commands or interpretation steps -> `docs/runbook/benchmarking/` diff --git a/docs/log.md b/docs/log.md index 48cacaf..d72d188 100644 --- a/docs/log.md +++ b/docs/log.md @@ -117,3 +117,7 @@ logs. The new drift audit anchors the deterministic source import -> proposal approval -> recall/debug -> correction/rollback route to current HTTP, MCP, config, and task surfaces. +- Added the privacy, delete, export, and retention boundary runbook for XY-1078, + plus a drift audit and spec updates for Source Library span suppression, Knowledge + Workspace source visibility, graph evidence readback, and relation-context + evidence filtering. diff --git a/docs/runbook/index.md b/docs/runbook/index.md index 63b8dd8..4afc59b 100644 --- a/docs/runbook/index.md +++ b/docs/runbook/index.md @@ -14,6 +14,8 @@ Routes to: Runbook concepts under `docs/runbook/`. Qdrant rebuild, rollback, and cleanup. - `agent-setup.md`: agent-oriented local installation flow, one-command local memory+knowledge loop, and Codex/Claude/Cursor/MCP/CLI recipes. +- `privacy_delete_export.md`: privacy, delete/forget, export, retention, and source + visibility operating boundaries. - `evaluation.md`: retrieval evaluation commands and interpretation flow. - `integration-testing.md`: integration and E2E test workflow. - `testing.md`: test names, scopes, and matching commands. diff --git a/docs/runbook/privacy_delete_export.md b/docs/runbook/privacy_delete_export.md new file mode 100644 index 0000000..f5c044e --- /dev/null +++ b/docs/runbook/privacy_delete_export.md @@ -0,0 +1,178 @@ +--- +type: Runbook +title: "Privacy, Delete, Export, and Retention Boundaries" +description: "Operate ELF memory and knowledge surfaces without confusing current recall, audit retention, export, and provider boundaries." +resource: docs/runbook/privacy_delete_export.md +status: active +authority: procedural +owner: runbook +last_verified: 2026-06-23 +tags: + - docs + - runbook + - privacy + - retention +source_refs: [] +code_refs: + - apps/elf-api/src/routes.rs + - apps/elf-mcp/src/server.rs + - packages/elf-service/src/delete.rs + - packages/elf-service/src/docs.rs + - packages/elf-service/src/graph_query.rs + - packages/elf-service/src/graph_report.rs + - packages/elf-service/src/knowledge.rs + - packages/elf-service/src/search.rs + - packages/elf-storage/src/docs.rs + - packages/elf-storage/src/knowledge.rs + - apps/elf-worker/src/worker.rs +related: + - docs/spec/system_doc_source_ref_v1.md + - docs/spec/system_elf_memory_service_v2.md + - docs/spec/system_graph_memory_postgres_v1.md + - docs/spec/system_knowledge_pages_v1.md +drift_watch: + - docs/runbook/privacy_delete_export.md + - apps/elf-api/src/routes.rs + - apps/elf-mcp/src/server.rs + - packages/elf-service/src/delete.rs + - packages/elf-service/src/docs.rs + - packages/elf-service/src/graph_query.rs + - packages/elf-service/src/graph_report.rs + - packages/elf-service/src/knowledge.rs + - packages/elf-service/src/search.rs + - packages/elf-storage/src/docs.rs + - packages/elf-storage/src/knowledge.rs +--- +# Privacy, Delete, Export, and Retention Boundaries + +Purpose: Operate ELF memory and knowledge surfaces without confusing current recall, +audit retention, export, and provider boundaries. +Read this when: You connect sources, ingest private chats, apply delete/forget, +export readback, or validate that derived projections no longer recall a source. +Not this document: Legal compliance policy, provider-specific data-processing terms, +or schema definitions. +Depends on: `docs/spec/system_elf_memory_service_v2.md`, +`docs/spec/system_doc_source_ref_v1.md`, +`docs/spec/system_knowledge_pages_v1.md`, and +`docs/spec/system_graph_memory_postgres_v1.md`. +Verification: Deleted, expired, unreadable, ignored, rejected, and excluded source +spans are absent from normal recall surfaces and derived search results while audit +surfaces remain clearly labeled. + +## Authority Map + +- Source Library: authoritative long-form source records in `doc_documents` and + `doc_chunks`. Qdrant doc vectors are derived and rebuildable. +- Memory Ledger: authoritative approved memory notes in `memory_notes`, note + versions, ingest decisions, and correction history. +- Knowledge Workspace: derived pages, sections, citations, lint findings, and + rebuild metadata. Knowledge pages are rebuildable projections, not source truth. +- Graph-lite facts: structured `graph_facts` rows with evidence links to memory + notes. Graph readback is valid only when evidence is still readable for the caller. +- Recall traces: bounded debug evidence for a search. They explain a historical + retrieval and are not canonical current recall. +- Benchmark artifacts: checked reports and snapshots. They are public-safe evidence + records, not private-corpus storage. + +## Delete And Forget + +Memory-note delete sets a note to `deleted`, writes a version row, and enqueues an +indexing delete. Ordinary search, search relation context, graph query, graph report, +and Knowledge Workspace page search must treat deleted or expired notes as +non-recallable. Provenance and history endpoints may still show deleted, deprecated, +or restored rows as audit evidence until lifecycle purge policy removes them. + +Source Library delete uses `DELETE /v2/docs/{doc_id}` or the MCP +`elf_docs_delete` tool. It marks source documents non-active and enqueues +per-chunk doc-index `DELETE` work so the worker removes derived doc vectors. Direct +document reads, L0 search, excerpt hydration, and derived Knowledge Workspace page +search must resolve only active source rows. A stored page may still exist after its +source is deleted, but page search must suppress snippets whose normalized source +refs no longer resolve to current sources readable under the caller's read profile +and shared-scope grants. + +Applied consolidation proposals are not a shortcut around source visibility. If a +Knowledge Workspace page cites a proposal, normal page search/export may expose only +bounded proposal metadata. Raw proposal `source_refs`, nested source snapshots, +lineage, diffs, markers, target refs, and proposed payload bodies stay in retained +review/audit surfaces, and nested non-captured spans suppress the page snippet. + +Graph facts are not hard-deleted merely because one evidence note is deleted. Graph +read APIs must require at least one active, unexpired, readable evidence note before +returning a fact, and must omit deleted or unreadable evidence note ids. Facts with +only deleted, expired, or private evidence are retained as stored rows but are not +normal recall results. + +Forget is stronger than ordinary delete only when the operator also removes or purges +the authoritative source rows and retained artifacts under the applicable lifecycle +policy. ELF does not provide a broad public "erase everything everywhere" endpoint in +this contract; use the explicit authority surface and verify each derived projection. + +## Private And Excluded Spans + +Connected chat, search, repo, and web sources can include private or irrelevant +material. Use request-level write policy exclusions and redactions before storing a +Source Library document or event-derived note. Source capture records policy spans as +`excluded` or `redacted` with reason codes; only `captured` spans are eligible for +derived page search readback. + +Private scope remains caller-bound. `agent_private` notes, docs, graph facts, and +derived source refs are readable only by the owning agent under a read profile that +allows private scope. Project and org shared rows still require the relevant scope to +be present in the read profile plus an owner-or-grant match where shared grants apply. + +Do not ingest secrets, tokens, private keys, seed phrases, passwords, bank ids, or +personal addresses. The write gate rejects detected secrets for memory notes, but +operators should treat connected-source capture as a pre-ingest trust boundary rather +than relying on downstream cleanup. + +## Export + +Export means reading through the public or admin API for a specific authority surface +and read profile. It does not bypass scope, payload level, source visibility, or +write-policy suppression. + +Use payload levels deliberately: + +- `l0`: compact recall and no source_ref payload. +- `l1`: structured summary without full source_ref payload. +- `l2`: full text and source_ref for callers authorized to inspect evidence. + +Benchmark reports and checked evidence under `docs/evidence/` must stay public-safe. +Do not commit private corpora, raw private chat logs, secrets, provider credentials, +or unsanitized source exports. Prefer fixture ids, bounded quotes, redaction markers, +and typed blockers when private/provider evidence cannot be published. + +## Provider And Local Storage Boundaries + +Postgres is the source of truth for notes, docs, graph facts, derived pages, audit +history, and source refs. Qdrant is a derived retrieval index and can be rebuilt or +dropped without changing source truth. + +Embedding, rerank, and LLM extractor providers may retain request data according to +their own terms. ELF can prevent recall from local derived projections after delete, +but it cannot retract bytes already sent to an external provider. For private chats, +regulated content, or operator-owned corpora, use local providers or disable the +provider-backed path until provider retention is acceptable. + +Recall traces and LLM cache rows are local audit/debug data with configured retention. +They are not source truth, but they can contain snippets or identifiers. Keep admin +binds local, avoid exposing trace bundles publicly, and purge local artifacts when an +operator requires stronger cleanup than normal current-recall suppression. +Public recall-debug panels must hydrate memory-note source refs only for active, +unexpired, readable notes; deleted, deprecated, expired, or unreadable notes may +remain in retained trace audit data but must not expose stored `source_ref` payloads +through normal agent-facing recall-debug. + +## Verification Checklist + +- Delete a memory note and verify ordinary search no longer returns it. +- Query graph facts for the deleted note's entity and verify facts without active + readable evidence are absent from graph query/report and relation context. +- Delete a Source Library document through `DELETE /v2/docs/{doc_id}` or + `elf_docs_delete` and verify direct doc reads, L0 search, excerpts, doc-vector + points, and Knowledge Workspace page search no longer surface its spans. +- Verify Knowledge Workspace lint or changed-source rebuild reports stale or missing + source refs instead of treating stale derived text as current authority. +- Verify exported reports and benchmark artifacts contain only public-safe ids, + bounded quotes, redactions, or typed blockers. diff --git a/docs/spec/system_doc_source_ref_v1.md b/docs/spec/system_doc_source_ref_v1.md index 0173807..19aec54 100644 --- a/docs/spec/system_doc_source_ref_v1.md +++ b/docs/spec/system_doc_source_ref_v1.md @@ -6,7 +6,7 @@ resource: docs/spec/system_doc_source_ref_v1.md status: active authority: normative owner: spec -last_verified: 2026-06-22 +last_verified: 2026-06-23 tags: - docs - spec @@ -14,10 +14,14 @@ source_refs: [] code_refs: - apps/elf-mcp/src/server.rs - packages/elf-service/src/docs.rs + - packages/elf-service/src/knowledge.rs - packages/elf-storage/src/docs.rs -related: [] +related: + - docs/runbook/privacy_delete_export.md drift_watch: - docs/spec/system_doc_source_ref_v1.md + - packages/elf-service/src/docs.rs + - packages/elf-service/src/knowledge.rs --- # System: `doc_source_ref/v1` for `docs_put` @@ -257,6 +261,23 @@ Persisted normalized `source_ref`: - Normalized capture fields are evidence metadata only. They MUST NOT promote a source record into approved Memory Authority. +Delete, export, and private-span boundary: + +- Source Library direct reads, L0 search, excerpt hydration, and derived + Knowledge Workspace search MUST resolve only active source documents and chunks + readable under the caller's scope context. +- Deleting or deactivating a Source Library document makes its document and chunk + refs non-recallable. Derived pages may retain stored stale text until rebuild, but + page search MUST suppress snippets whose source refs no longer resolve to active + readable document or chunk rows. +- `doc_source_span/v1` entries with `status = "excluded"` or `status = "redacted"` + are audit evidence for write-policy handling. They MUST NOT be treated as captured + source evidence for derived page search, memory promotion, graph facts, or export + payloads that claim to contain current recallable source text. +- Export of Source Library material is an authorized API readback of current + source rows and payload levels. It MUST NOT bypass scope, document status, + write-policy spans, or source visibility rules. + ================================================== 6) Examples ================================================== diff --git a/docs/spec/system_elf_memory_service_v2.md b/docs/spec/system_elf_memory_service_v2.md index ec7586e..a2b3abd 100644 --- a/docs/spec/system_elf_memory_service_v2.md +++ b/docs/spec/system_elf_memory_service_v2.md @@ -6,7 +6,7 @@ resource: docs/spec/system_elf_memory_service_v2.md status: active authority: normative owner: spec -last_verified: 2026-06-22 +last_verified: 2026-06-23 tags: - docs - spec @@ -1187,6 +1187,10 @@ Behavior: - Each row must expose selection state, authority layer, freshness state, source refs or source snapshots, score/rank where available, stage reason, evidence class, and replay command or deterministic artifact path when available. +- Public recall-debug memory-note source refs must resolve through current active, + unexpired, readable `memory_notes` at read time. Historical trace items for + deleted, deprecated, expired, or unreadable notes may remain retained audit data, + but public recall-debug must not hydrate their stored `source_ref` payloads. - Responses must include `recall_trace` with schema `elf.recall_trace/v1`: a compact deterministic projection over selected, dropped, stale, blocked, and not-requested context for agent and fixture/report assertions. @@ -1222,6 +1226,9 @@ Behavior: traces, or source pointers. - Page snippets are not authoritative note search hits and must be labeled as derived knowledge page snippets wherever surfaced. +- Page search must use `X-ELF-Read-Profile` and shared-scope grants to resolve + readable source scopes before returning snippets; sections with source refs outside + that effective visibility are suppressed. - The detailed contract is defined in `system_knowledge_pages_v1.md`. Admin reviewable memory summary readback: @@ -1346,6 +1353,9 @@ Notes: - `relation_context` is omitted unless `search.graph_context.enabled` is true. - When present, relation context is evidence-bound and bounded by `search.graph_context.max_facts_per_item` and `search.graph_context.max_evidence_notes_per_fact`. +- Relation context must include only graph facts backed by active, unexpired, + readable evidence notes at read time. Deleted, expired, or unreadable evidence + note ids must be omitted. - `relation_context.temporal_status` is derived from the graph fact validity window at the search read timestamp. Historical facts may be returned when they are evidence-linked to a selected note; they must be labeled `historical` instead of being presented as current. @@ -1825,6 +1835,76 @@ Notes: - `ingestion_profile.id` is required when profile override is provided, and when `version` is omitted, latest version for that id is used. - If `ingestion_profile` is omitted, the tenant/project default profile is used. +POST /v2/docs + +Headers: +- X-ELF-Tenant-Id, X-ELF-Project-Id, X-ELF-Agent-Id + +Behavior: +- Stores a Source Library document, persists normalized source capture metadata, + writes doc chunks, and enqueues doc-index `UPSERT` jobs for derived Qdrant points. +- The request may include write-policy redactions or exclusions; excluded spans are + retained as policy metadata but are not captured source spans. +- This endpoint must not create Memory Ledger notes, graph facts, knowledge pages, + search traces, or recall hits. + +GET /v2/docs/{doc_id} + +Headers: +- X-ELF-Tenant-Id, X-ELF-Project-Id, X-ELF-Agent-Id +- X-ELF-Read-Profile + +Behavior: +- Returns active Source Library document metadata only when the caller's read profile + and shared grants can read the document scope. +- Deleted documents are not returned through this current readback path. + +DELETE /v2/docs/{doc_id} + +Headers: +- X-ELF-Tenant-Id, X-ELF-Project-Id, X-ELF-Agent-Id + +Response: +{ + "doc_id": "uuid", + "op": "ADD|UPDATE|NONE|DELETE|REJECTED", + "chunk_delete_count": 0 +} + +Behavior: +- Marks the Source Library document `deleted` when the caller owns the document and + the document scope is writable. +- Enqueues a doc-index `DELETE` job for every persisted document chunk so the worker + removes derived Qdrant doc-vector points. +- Repeating delete on an already deleted document returns `op = NONE`. +- Delete does not mutate Memory Ledger notes, graph facts, knowledge pages, recall + traces, benchmark artifacts, or retained audit rows. Those derived/readback + surfaces must independently suppress deleted document spans during current recall. + +POST /v2/docs/search/l0 + +Headers: +- X-ELF-Tenant-Id, X-ELF-Project-Id, X-ELF-Agent-Id +- X-ELF-Read-Profile + +Behavior: +- Runs chunk-level Source Library search over active docs by default, with service + read-profile and shared-grant checks after candidate retrieval. +- Deleted docs may be inspected only through explicit non-current audit or debugging + paths; normal Source Library search and derived Knowledge Workspace search must not + surface deleted source spans as current context. + +POST /v2/docs/excerpts + +Headers: +- X-ELF-Tenant-Id, X-ELF-Project-Id, X-ELF-Agent-Id +- X-ELF-Read-Profile + +Behavior: +- Hydrates bounded excerpts only from active, readable Source Library documents and + verifies the requested chunk, quote, or position selector against current source + content. + GET /v2/admin/events/ingestion-profiles Headers: @@ -1999,7 +2079,8 @@ Notes: - Shared scopes still apply grant checks; unreadable shared facts are not returned. - `limit` defaults to 50 and must be in the range 1..200. - `truncated = true` means additional facts matched but were clipped by `limit`. -- `evidence_note_ids` is ordered by evidence creation time and capped to 16 IDs per fact. +- `evidence_note_ids` is ordered by evidence creation time, capped to 16 IDs per + fact, and includes only active, unexpired, readable evidence notes. - `explain` defaults to false; when true, response includes `explain.schema = "elf.graph_query/v1"`. GET /v2/core-blocks @@ -2464,6 +2545,7 @@ Original query: - elf_searches_notes -> POST /v2/searches/{search_id}/notes - elf_docs_put -> POST /v2/docs - elf_docs_get -> GET /v2/docs/{doc_id} + - elf_docs_delete -> DELETE /v2/docs/{doc_id} - elf_docs_search_l0 -> POST /v2/docs/search/l0 - elf_docs_excerpts_get -> POST /v2/docs/excerpts - elf_notes_list -> GET /v2/notes diff --git a/docs/spec/system_graph_memory_postgres_v1.md b/docs/spec/system_graph_memory_postgres_v1.md index c68fa57..c1c8e78 100644 --- a/docs/spec/system_graph_memory_postgres_v1.md +++ b/docs/spec/system_graph_memory_postgres_v1.md @@ -6,15 +6,22 @@ resource: docs/spec/system_graph_memory_postgres_v1.md status: active authority: normative owner: spec -last_verified: 2026-06-18 +last_verified: 2026-06-23 tags: - docs - spec source_refs: [] -code_refs: [] -related: [] +code_refs: + - packages/elf-service/src/graph_query.rs + - packages/elf-service/src/graph_report.rs + - packages/elf-service/src/search.rs +related: + - docs/runbook/privacy_delete_export.md drift_watch: - docs/spec/system_graph_memory_postgres_v1.md + - packages/elf-service/src/graph_query.rs + - packages/elf-service/src/graph_report.rs + - packages/elf-service/src/search.rs --- # Graph Memory Postgres v1.0 Specification @@ -216,6 +223,12 @@ Supersession rule (write-time): - `current` when `valid_from <= read_at AND (valid_to IS NULL OR valid_to > read_at)`. - `historical` when `valid_to <= read_at`. - `future` when `valid_from > read_at`. +- Graph query, graph report, and search relation context readbacks must return a + fact only when at least one linked evidence note is active, unexpired, and readable + under the caller's scope context at read time. +- Evidence note id lists in graph readbacks must include only active, unexpired, + readable evidence notes. Deleted, expired, or unreadable evidence notes may remain + as stored audit rows but must not be serialized as current graph evidence. - Search relation context may include historical facts when they are evidence-linked to a returned note, but it must label them as historical instead of silently treating them as current. - Graph report APIs expose `elf.graph_report/v1` topic maps from the same Postgres graph-lite tables. Report facts must retain `valid_from`, `valid_to`, diff --git a/docs/spec/system_knowledge_pages_v1.md b/docs/spec/system_knowledge_pages_v1.md index a128cbf..93749b5 100644 --- a/docs/spec/system_knowledge_pages_v1.md +++ b/docs/spec/system_knowledge_pages_v1.md @@ -6,7 +6,7 @@ resource: docs/spec/system_knowledge_pages_v1.md status: active authority: normative owner: spec -last_verified: 2026-06-22 +last_verified: 2026-06-23 tags: - docs - spec @@ -18,7 +18,8 @@ code_refs: - packages/elf-storage/src/knowledge.rs - sql/tables/035_knowledge_pages.sql - sql/tables/037_knowledge_page_source_refs.sql -related: [] +related: + - docs/runbook/privacy_delete_export.md drift_watch: - docs/spec/system_knowledge_pages_v1.md - apps/elf-api/src/routes.rs @@ -106,13 +107,52 @@ Rebuild input sources may include: - active Source Library `doc_documents` - active Source Library `doc_chunks` as cited source spans -- active or historical `memory_notes` -- durable `add_event` audit rows from `memory_ingest_decisions` -- `graph_facts` plus `graph_fact_evidence` +- active, unexpired, readable `memory_notes` +- durable `add_event` audit rows from `memory_ingest_decisions` that remain linked + to active, unexpired, readable notes and policy decisions of `remember` or `update` +- `graph_facts` plus active, unexpired, readable `graph_fact_evidence` notes - applied `consolidation_proposals` Unreviewed consolidation proposals must not be used as source input for persisted pages. +## Source Visibility And Delete Suppression + +Knowledge pages are stored derived artifacts, so an already persisted page can outlive +one of its source rows. Readback must not treat that stale derived text as current +source authority. + +Knowledge page rebuild, lint, changed-source rebuild, and search must resolve stored +normalized source refs through current authoritative source rows. Search must resolve +the caller's `read_profile` to allowed source scopes and enforce shared-scope grants +before deciding whether a stored section is recallable. A source ref is current and +recallable only when: + +- Source Library document and chunk refs point to active rows readable by the caller. +- Memory note refs point to active, unexpired rows readable by the caller. +- Event refs point to `remember` or `update` ingest decisions still linked to active, + unexpired, readable notes. +- Relation refs point to graph facts with at least one active, unexpired, readable + evidence note. +- Proposal refs point to applied consolidation proposals and their nested proposal + inputs do not carry deleted, private, excluded, redacted, or otherwise + non-recallable source spans. +- Source snapshots with Source Library spans contain only `captured` spans for the + cited derived content. + +Knowledge page search must suppress a section snippet when any normalized source ref +for that section is deleted, expired, unreadable under the caller's read profile, +unreadable under the shared-grant owner/scope model, ignored, rejected, unapplied, or +contains a non-captured private/excluded span. Lint and changed-source rebuild may +still report stale or missing refs as operator diagnostics, but ordinary page search +must not surface the stale derived text as current recall. + +Proposal-backed source refs are review artifacts. Ordinary page search and export +must not return raw proposal `source_refs`, nested `source_snapshot`, `lineage`, +`diff`, flags, markers, target refs, or proposed payload bodies. Search/export +readback may expose bounded proposal metadata such as proposal id, run id, kind, +review state, confidence, payload hash, update timestamp, and a count of omitted +nested source refs. + `knowledge_pages.source_coverage` must include: - `schema = "elf.knowledge_page.source_coverage/v1"` @@ -250,7 +290,8 @@ Page search results must include: - result type discriminator `knowledge_page_section` - page id, page kind, page key, title, status, section id, section key, heading, role - bounded section snippet -- section citations and normalized source backlinks +- section citations and normalized source backlinks, with proposal-backed citations + reduced to bounded metadata - page source coverage metadata - rebuild metadata, including previous-version diff metadata when present - lint summary and trust state that distinguishes clean, warning, error, and low diff --git a/packages/elf-service/src/access.rs b/packages/elf-service/src/access.rs index 9de9906..f34c3a9 100644 --- a/packages/elf-service/src/access.rs +++ b/packages/elf-service/src/access.rs @@ -47,6 +47,19 @@ pub(crate) fn note_read_allowed( }) } +pub(crate) fn shared_scope_key_strings( + shared_grants: &HashSet, +) -> Vec { + let mut keys = shared_grants + .iter() + .map(|item| format!("{}:{}", item.scope, item.space_owner_agent_id)) + .collect::>(); + + keys.sort(); + + keys +} + pub(crate) async fn load_shared_read_grants<'e, E>( executor: E, tenant_id: &str, diff --git a/packages/elf-service/src/docs.rs b/packages/elf-service/src/docs.rs index c79c945..2c4c74f 100644 --- a/packages/elf-service/src/docs.rs +++ b/packages/elf-service/src/docs.rs @@ -20,7 +20,7 @@ use tokenizers::Tokenizer; use uuid::Uuid; use crate::{ - ElfService, Error, Result, + ElfService, Error, NoteOp, Result, access::{self, ORG_PROJECT_ID, SharedSpaceGrantKey}, search, }; @@ -243,6 +243,30 @@ pub struct DocsGetResponse { pub updated_at: OffsetDateTime, } +/// Request payload for Source Library document deletion. +#[derive(Clone, Debug, Deserialize)] +pub struct DocsDeleteRequest { + /// Tenant that owns the document. + pub tenant_id: String, + /// Project that owns the document. + pub project_id: String, + /// Agent requesting the deletion. + pub agent_id: String, + /// Identifier of the document to delete. + pub doc_id: Uuid, +} + +/// Response payload for Source Library document deletion. +#[derive(Clone, Debug, Serialize)] +pub struct DocsDeleteResponse { + /// Identifier of the affected document. + pub doc_id: Uuid, + /// Operation that was applied. + pub op: NoteOp, + /// Number of persisted chunks queued for derived-index deletion. + pub chunk_delete_count: u32, +} + /// Request payload for L0 document retrieval. #[derive(Clone, Debug, Deserialize)] pub struct DocsSearchL0Request { @@ -872,6 +896,104 @@ LIMIT 1", }) } + /// Soft-deletes one Source Library document and enqueues doc-vector deletion. + pub async fn docs_delete(&self, req: DocsDeleteRequest) -> Result { + let now = OffsetDateTime::now_utc(); + let embed_version = crate::embedding_version(&self.cfg); + let tenant_id = req.tenant_id.trim(); + let project_id = req.project_id.trim(); + let agent_id = req.agent_id.trim(); + + if tenant_id.is_empty() || project_id.is_empty() || agent_id.is_empty() { + return Err(Error::InvalidRequest { + message: "tenant_id, project_id, and agent_id are required.".to_string(), + }); + } + + let mut tx = self.db.pool.begin().await?; + let row: DocDocument = sqlx::query_as::<_, DocDocument>( + "\ +SELECT + doc_id, + tenant_id, + project_id, + agent_id, + scope, + doc_type, + status, + title, + COALESCE(source_ref, '{}'::jsonb) AS source_ref, + content, + content_bytes, + content_hash, + created_at, + updated_at +FROM doc_documents +WHERE doc_id = $1 + AND tenant_id = $2 + AND ( + project_id = $3 + OR (project_id = $4 AND scope = 'org_shared') + ) +FOR UPDATE", + ) + .bind(req.doc_id) + .bind(tenant_id) + .bind(project_id) + .bind(ORG_PROJECT_ID) + .fetch_optional(&mut *tx) + .await? + .ok_or_else(|| Error::NotFound { message: "Doc not found.".to_string() })?; + + if row.agent_id != agent_id { + return Err(Error::NotFound { message: "Doc not found.".to_string() }); + } + + let scope_allowed = self.cfg.scopes.allowed.iter().any(|scope| scope == &row.scope); + let write_allowed = match row.scope.as_str() { + "agent_private" => self.cfg.scopes.write_allowed.agent_private, + "project_shared" => self.cfg.scopes.write_allowed.project_shared, + "org_shared" => self.cfg.scopes.write_allowed.org_shared, + _ => false, + }; + + if !scope_allowed || !write_allowed { + return Err(Error::ScopeDenied { message: "Scope is not allowed.".to_string() }); + } + if row.status == "deleted" { + tx.commit().await?; + + return Ok(DocsDeleteResponse { + doc_id: row.doc_id, + op: NoteOp::None, + chunk_delete_count: 0, + }); + } + + let chunks = docs::list_doc_chunks(&mut *tx, row.doc_id).await?; + + docs::mark_doc_deleted(&mut *tx, tenant_id, row.doc_id, now).await?; + + for chunk in &chunks { + doc_outbox::enqueue_doc_outbox( + &mut *tx, + row.doc_id, + chunk.chunk_id, + "DELETE", + embed_version.as_str(), + ) + .await?; + } + + tx.commit().await?; + + Ok(DocsDeleteResponse { + doc_id: row.doc_id, + op: NoteOp::Delete, + chunk_delete_count: chunks.len() as u32, + }) + } + /// Runs L0 document retrieval with access filtering and optional explain output. pub async fn docs_search_l0(&self, req: DocsSearchL0Request) -> Result { let trace_id = Uuid::new_v4(); diff --git a/packages/elf-service/src/graph_query.rs b/packages/elf-service/src/graph_query.rs index 75e37d7..9207fca 100644 --- a/packages/elf-service/src/graph_query.rs +++ b/packages/elf-service/src/graph_query.rs @@ -37,10 +37,22 @@ SELECT COALESCE( (SELECT ARRAY_AGG(e.note_id ORDER BY e.created_at ASC, e.note_id ASC) FROM ( - SELECT note_id, created_at - FROM graph_fact_evidence - WHERE fact_id = gf.fact_id - ORDER BY created_at ASC, note_id ASC + SELECT evidence.note_id, evidence.created_at + FROM graph_fact_evidence evidence + JOIN memory_notes note ON note.note_id = evidence.note_id + WHERE evidence.fact_id = gf.fact_id + AND note.tenant_id = gf.tenant_id + AND note.project_id = gf.project_id + AND note.status = 'active' + AND (note.expires_at IS NULL OR note.expires_at > now()) + AND note.scope = ANY($4::text[]) + AND ( + (note.scope = 'agent_private' AND note.agent_id = $6) + OR (note.scope <> 'agent_private' AND ( + note.agent_id = $6 OR (note.scope || ':' || note.agent_id) = ANY($7::text[]) + )) + ) + ORDER BY evidence.created_at ASC, evidence.note_id ASC LIMIT $9 ) e), '{}'::uuid[] @@ -63,6 +75,23 @@ WHERE gf.tenant_id = $1 gf.agent_id = $6 OR (gf.scope || ':' || gf.agent_id) = ANY($7::text[]) )) ) + AND EXISTS ( + SELECT 1 + FROM graph_fact_evidence evidence + JOIN memory_notes note ON note.note_id = evidence.note_id + WHERE evidence.fact_id = gf.fact_id + AND note.tenant_id = gf.tenant_id + AND note.project_id = gf.project_id + AND note.status = 'active' + AND (note.expires_at IS NULL OR note.expires_at > now()) + AND note.scope = ANY($4::text[]) + AND ( + (note.scope = 'agent_private' AND note.agent_id = $6) + OR (note.scope <> 'agent_private' AND ( + note.agent_id = $6 OR (note.scope || ':' || note.agent_id) = ANY($7::text[]) + )) + ) + ) ORDER BY gf.valid_from DESC, gf.fact_id ASC LIMIT $8"; @@ -347,40 +376,7 @@ impl ElfService { }, ) .await?; - let facts: Vec = rows - .into_iter() - .map(|row| { - let object = if let Some(entity_id) = row.object_entity_id { - GraphQueryObject { - entity: Some(GraphQueryObjectEntity { - entity_id, - canonical: row.object_canonical.unwrap_or_else(|| "".to_string()), - kind: row.object_kind, - }), - value: None, - } - } else { - GraphQueryObject { entity: None, value: row.object_value } - }; - - GraphQueryFact { - fact_id: row.fact_id, - scope: row.scope, - actor: row.actor, - predicate: row.predicate, - predicate_id: row.predicate_id, - valid_from: row.valid_from, - valid_to: row.valid_to, - temporal_status: crate::graph::relation_temporal_status( - row.valid_from, - row.valid_to, - read_at, - ), - object, - evidence_note_ids: row.evidence_note_ids, - } - }) - .collect(); + let facts = graph_query_facts_from_rows(rows, read_at); let queried_rows = facts.len(); let (facts, truncated) = truncate_graph_query_facts(facts, prepared.limit); let explain = if prepared.explain { @@ -493,6 +489,46 @@ pub(crate) fn build_graph_query_explain( } } +fn graph_query_facts_from_rows( + rows: Vec, + read_at: OffsetDateTime, +) -> Vec { + rows.into_iter() + .filter(|row| !row.evidence_note_ids.is_empty()) + .map(|row| { + let object = if let Some(entity_id) = row.object_entity_id { + GraphQueryObject { + entity: Some(GraphQueryObjectEntity { + entity_id, + canonical: row.object_canonical.unwrap_or_else(|| "".to_string()), + kind: row.object_kind, + }), + value: None, + } + } else { + GraphQueryObject { entity: None, value: row.object_value } + }; + + GraphQueryFact { + fact_id: row.fact_id, + scope: row.scope, + actor: row.actor, + predicate: row.predicate, + predicate_id: row.predicate_id, + valid_from: row.valid_from, + valid_to: row.valid_to, + temporal_status: crate::graph::relation_temporal_status( + row.valid_from, + row.valid_to, + read_at, + ), + object, + evidence_note_ids: row.evidence_note_ids, + } + }) + .collect() +} + fn validate_graph_query_request(req: GraphQueryRequest) -> Result { let tenant_id = normalize_required_field(req.tenant_id.as_str(), "tenant_id")?; let project_id = normalize_required_field(req.project_id.as_str(), "project_id")?; @@ -826,4 +862,45 @@ mod tests { assert_eq!(resolved, vec!["project_shared".to_string()]); assert_eq!(deduped.len(), 1); } + + #[test] + fn graph_query_rows_without_readable_evidence_are_suppressed() { + let read_at = OffsetDateTime::from_unix_timestamp(30).expect("valid timestamp"); + let rows = vec![ + super::GraphQueryFactRow { + fact_id: Uuid::from_u128(1), + scope: "agent_private".to_string(), + actor: "agent".to_string(), + predicate: "works at".to_string(), + predicate_id: None, + object_entity_id: None, + object_canonical: None, + object_kind: None, + object_value: Some("Deleted Source Inc.".to_string()), + valid_from: OffsetDateTime::from_unix_timestamp(10).expect("valid timestamp"), + valid_to: None, + evidence_note_ids: vec![], + }, + super::GraphQueryFactRow { + fact_id: Uuid::from_u128(2), + scope: "agent_private".to_string(), + actor: "agent".to_string(), + predicate: "works at".to_string(), + predicate_id: None, + object_entity_id: None, + object_canonical: None, + object_kind: None, + object_value: Some("Active Source Inc.".to_string()), + valid_from: OffsetDateTime::from_unix_timestamp(20).expect("valid timestamp"), + valid_to: None, + evidence_note_ids: vec![Uuid::from_u128(200)], + }, + ]; + let facts = super::graph_query_facts_from_rows(rows, read_at); + + assert_eq!(facts.len(), 1); + assert_eq!(facts[0].fact_id, Uuid::from_u128(2)); + assert_eq!(facts[0].object.value.as_deref(), Some("Active Source Inc.")); + assert_eq!(facts[0].evidence_note_ids, vec![Uuid::from_u128(200)]); + } } diff --git a/packages/elf-service/src/graph_report.rs b/packages/elf-service/src/graph_report.rs index 7d18dc4..97cc52c 100644 --- a/packages/elf-service/src/graph_report.rs +++ b/packages/elf-service/src/graph_report.rs @@ -42,10 +42,22 @@ SELECT COALESCE( (SELECT ARRAY_AGG(e.note_id ORDER BY e.created_at ASC, e.note_id ASC) FROM ( - SELECT note_id, created_at - FROM graph_fact_evidence - WHERE fact_id = gf.fact_id - ORDER BY created_at ASC, note_id ASC + SELECT evidence.note_id, evidence.created_at + FROM graph_fact_evidence evidence + JOIN memory_notes note ON note.note_id = evidence.note_id + WHERE evidence.fact_id = gf.fact_id + AND note.tenant_id = gf.tenant_id + AND note.project_id = gf.project_id + AND note.status = 'active' + AND (note.expires_at IS NULL OR note.expires_at > now()) + AND note.scope = ANY($4::text[]) + AND ( + (note.scope = 'agent_private' AND note.agent_id = $6) + OR (note.scope <> 'agent_private' AND ( + note.agent_id = $6 OR (note.scope || ':' || note.agent_id) = ANY($7::text[]) + )) + ) + ORDER BY evidence.created_at ASC, evidence.note_id ASC LIMIT $9 ) e), '{}'::uuid[] @@ -80,6 +92,23 @@ WHERE gf.tenant_id = $1 gf.agent_id = $6 OR (gf.scope || ':' || gf.agent_id) = ANY($7::text[]) )) ) + AND EXISTS ( + SELECT 1 + FROM graph_fact_evidence evidence + JOIN memory_notes note ON note.note_id = evidence.note_id + WHERE evidence.fact_id = gf.fact_id + AND note.tenant_id = gf.tenant_id + AND note.project_id = gf.project_id + AND note.status = 'active' + AND (note.expires_at IS NULL OR note.expires_at > now()) + AND note.scope = ANY($4::text[]) + AND ( + (note.scope = 'agent_private' AND note.agent_id = $6) + OR (note.scope <> 'agent_private' AND ( + note.agent_id = $6 OR (note.scope || ':' || note.agent_id) = ANY($7::text[]) + )) + ) + ) ORDER BY gf.valid_from DESC, gf.fact_id ASC LIMIT $8"; @@ -528,6 +557,8 @@ fn build_report_facts( rows: Vec, as_of: OffsetDateTime, ) -> Vec { + let rows: Vec = + rows.into_iter().filter(|row| !row.evidence_note_ids.is_empty()).collect(); let current_single_counts = current_single_predicate_counts(&rows, as_of); rows.into_iter() @@ -909,6 +940,26 @@ mod tests { assert!(facts[2].status_markers.iter().any(|marker| marker == "inferred")); } + #[test] + fn graph_report_suppresses_facts_without_readable_evidence() { + let mut deleted_source = + row(1, "Deleted Source Inc.", 10, None, "active", "single", vec![]); + + deleted_source.evidence_note_ids = vec![]; + + let facts = graph_report::build_report_facts( + vec![ + deleted_source, + row(2, "Active Source Inc.", 20, None, "active", "single", vec![]), + ], + ts(25), + ); + + assert_eq!(facts.len(), 1); + assert_eq!(facts[0].fact_id, Uuid::from_u128(2)); + assert_eq!(facts[0].object.value.as_deref(), Some("Active Source Inc.")); + } + #[test] fn graph_topic_map_preserves_fact_edges_and_source_markers() { let subject = super::ResolvedGraphReportSubject { diff --git a/packages/elf-service/src/knowledge.rs b/packages/elf-service/src/knowledge.rs index aafc7f7..75e3533 100644 --- a/packages/elf-service/src/knowledge.rs +++ b/packages/elf-service/src/knowledge.rs @@ -1,18 +1,19 @@ //! Deterministic derived knowledge page rebuild and readback service APIs. -use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use serde::{Deserialize, Serialize}; -use serde_json::{self, Map, Value}; +use serde_json::{self, Map, Number, Value}; use sqlx::{Postgres, Transaction}; use time::OffsetDateTime; use uuid::Uuid; use crate::{ - ElfService, Error, Result, + ElfService, Error, Result, access, consolidation::{ ConsolidationProposalInput, ConsolidationRunCreateRequest, ConsolidationRunCreateResponse, }, + search, }; use elf_domain::{ consolidation::{ @@ -32,7 +33,7 @@ use elf_storage::{ self, KnowledgeDocChunkSource, KnowledgeDocSource, KnowledgeEventSource, KnowledgeNoteSource, KnowledgePageLintFindingInsert, KnowledgePageSearchRow, KnowledgePageSectionInsert, KnowledgePageSourceRefInsert, KnowledgePageUpsert, - KnowledgeProposalSource, KnowledgeRelationSource, + KnowledgeProposalSource, KnowledgeRelationSource, KnowledgeRelationSourcesFetch, }, models::{ KnowledgePage, KnowledgePageLintFinding, KnowledgePageSection, KnowledgePageSourceRef, @@ -138,6 +139,10 @@ pub struct KnowledgePageSearchRequest { pub tenant_id: String, /// Project that owns the pages. pub project_id: String, + /// Agent requesting the page search. + pub agent_id: String, + /// Read profile controlling source visibility. + pub read_profile: String, /// English-only query for page title, key, heading, or section content. pub query: String, /// Optional page-kind filter. @@ -770,7 +775,7 @@ impl SourceIds { { return Err(Error::InvalidRequest { message: - "all requested knowledge page sources must exist, document sources must be active, and proposals must be applied" + "all requested knowledge page sources must exist, source rows must be active and readable, and proposals must be applied" .to_string(), }); } @@ -970,12 +975,25 @@ impl ElfService { ) -> Result { validate_non_empty("tenant_id", req.tenant_id.as_str())?; validate_non_empty("project_id", req.project_id.as_str())?; + validate_non_empty("agent_id", req.agent_id.as_str())?; + validate_non_empty("read_profile", req.read_profile.as_str())?; validate_non_empty("query", req.query.as_str())?; if !english_gate::is_english_natural_language(req.query.as_str()) { return Err(Error::NonEnglishInput { field: "$.query".to_string() }); } + let allowed_scopes = + search::resolve_read_profile_scopes(&self.cfg, req.read_profile.as_str())?; + let org_shared_allowed = allowed_scopes.iter().any(|scope| scope == "org_shared"); + let shared_grants = access::load_shared_read_grants_with_org_shared( + &self.db.pool, + req.tenant_id.as_str(), + req.project_id.as_str(), + req.agent_id.as_str(), + org_shared_allowed, + ) + .await?; let query = req.query.trim().to_ascii_lowercase(); let query_pattern = format!("%{query}%"); let page_kind = req.page_kind.map(KnowledgePageKind::as_str); @@ -991,13 +1009,24 @@ impl ElfService { let page_ids = sorted_unique(&rows.iter().map(|row| row.page_id).collect::>()); let source_refs = knowledge::list_knowledge_page_source_refs_for_pages(&self.db.pool, &page_ids).await?; + let current_source_keys = self + .resolve_current_recallable_source_keys( + req.tenant_id.as_str(), + req.project_id.as_str(), + req.agent_id.as_str(), + &allowed_scopes, + &shared_grants, + &source_refs, + ) + .await?; let source_refs_by_section = source_refs_by_section(&source_refs); let items = rows .into_iter() - .map(|row| { + .filter_map(|row| { let refs = cloned_source_refs(source_refs_by_section.get(&row.section_id)); - knowledge_page_search_item(row, refs, req.query.as_str()) + recallable_source_refs(refs.as_slice(), ¤t_source_keys) + .then(|| knowledge_page_search_item(row, refs, req.query.as_str())) }) .collect(); @@ -1079,8 +1108,25 @@ impl ElfService { req: &KnowledgePageRebuildRequest, ids: &SourceIds, ) -> Result> { + let allowed_scopes = self.cfg.scopes.allowed.as_slice(); + let org_shared_allowed = allowed_scopes.iter().any(|scope| scope == "org_shared"); + let shared_grants = access::load_shared_read_grants_with_org_shared( + &self.db.pool, + req.tenant_id.as_str(), + req.project_id.as_str(), + req.agent_id.as_str(), + org_shared_allowed, + ) + .await?; let (docs, doc_chunks, notes, events, relations, proposals) = self - .resolve_existing_source_rows(req.tenant_id.as_str(), req.project_id.as_str(), ids) + .resolve_existing_source_rows( + req.tenant_id.as_str(), + req.project_id.as_str(), + Some(req.agent_id.as_str()), + allowed_scopes, + &shared_grants, + ids, + ) .await?; ids.require_counts( @@ -1099,6 +1145,9 @@ impl ElfService { &self, tenant_id: &str, project_id: &str, + agent_id: Option<&str>, + allowed_scopes: &[String], + shared_grants: &HashSet, ids: &SourceIds, ) -> Result<( Vec, @@ -1112,35 +1161,99 @@ impl ElfService { &self.db.pool, tenant_id, project_id, + agent_id, + allowed_scopes, &ids.doc_ids, ) .await?; + let docs = docs + .into_iter() + .filter(|source| { + source_row_read_allowed( + source.agent_id.as_str(), + source.scope.as_str(), + agent_id, + allowed_scopes, + shared_grants, + ) + }) + .collect(); let doc_chunks = knowledge::fetch_knowledge_doc_chunk_sources( &self.db.pool, tenant_id, project_id, + agent_id, + allowed_scopes, &ids.doc_chunk_ids, ) .await?; + let doc_chunks = doc_chunks + .into_iter() + .filter(|source| { + source_row_read_allowed( + source.agent_id.as_str(), + source.scope.as_str(), + agent_id, + allowed_scopes, + shared_grants, + ) + }) + .collect(); let notes = knowledge::fetch_knowledge_note_sources( &self.db.pool, tenant_id, project_id, + agent_id, + allowed_scopes, &ids.note_ids, ) .await?; + let notes = notes + .into_iter() + .filter(|source| { + source_row_read_allowed( + source.agent_id.as_str(), + source.scope.as_str(), + agent_id, + allowed_scopes, + shared_grants, + ) + }) + .collect(); let events = knowledge::fetch_knowledge_event_sources( &self.db.pool, tenant_id, project_id, + agent_id, + allowed_scopes, &ids.event_ids, ) .await?; + let events = events + .into_iter() + .filter(|source| { + source_row_read_allowed( + source.agent_id.as_str(), + source.scope.as_str(), + agent_id, + allowed_scopes, + shared_grants, + ) + }) + .collect(); + let shared_scope_keys = access::shared_scope_key_strings(shared_grants); + let private_allowed = allowed_scopes.iter().any(|scope| scope == "agent_private"); let relations = knowledge::fetch_knowledge_relation_sources( &self.db.pool, - tenant_id, - project_id, - &ids.relation_ids, + KnowledgeRelationSourcesFetch { + tenant_id, + project_id, + agent_id, + allowed_scopes, + shared_scope_keys: shared_scope_keys.as_slice(), + private_allowed, + fact_ids: &ids.relation_ids, + }, ) .await?; let proposals = knowledge::fetch_knowledge_proposal_sources( @@ -1188,13 +1301,47 @@ impl ElfService { Error::InvalidRequest { message: "stored knowledge page kind is invalid".to_string() } })?; let (docs, doc_chunks, notes, events, relations, proposals) = self - .resolve_existing_source_rows(page.tenant_id.as_str(), page.project_id.as_str(), ids) + .resolve_existing_source_rows( + page.tenant_id.as_str(), + page.project_id.as_str(), + None, + self.cfg.scopes.allowed.as_slice(), + &HashSet::new(), + ids, + ) .await?; let mut sources = source_snapshots(docs, doc_chunks, notes, events, relations, proposals); Ok(sources.drain(..).map(|source| (source_key(&source), source)).collect()) } + async fn resolve_current_recallable_source_keys( + &self, + tenant_id: &str, + project_id: &str, + agent_id: &str, + allowed_scopes: &[String], + shared_grants: &HashSet, + source_refs: &[KnowledgePageSourceRef], + ) -> Result> { + let ids = SourceIds::from_source_refs(source_refs)?; + let (docs, doc_chunks, notes, events, relations, proposals) = self + .resolve_existing_source_rows( + tenant_id, + project_id, + Some(agent_id), + allowed_scopes, + shared_grants, + &ids, + ) + .await?; + + Ok(source_snapshots(docs, doc_chunks, notes, events, relations, proposals) + .into_iter() + .map(|source| source_key(&source)) + .collect()) + } + async fn watch_rebuild_page( &self, agent_id: &str, @@ -2122,6 +2269,93 @@ fn source_refs_by_section( by_section } +fn recallable_source_refs( + source_refs: &[KnowledgePageSourceRef], + current_source_keys: &BTreeSet, +) -> bool { + !source_refs.is_empty() + && source_refs.iter().all(|source_ref| { + current_source_keys + .contains(¤t_key(source_ref.source_kind.as_str(), source_ref.source_id)) + && recallable_source_ref(source_ref) + }) +} + +fn source_row_read_allowed( + owner_agent_id: &str, + scope: &str, + requester_agent_id: Option<&str>, + allowed_scopes: &[String], + shared_grants: &HashSet, +) -> bool { + if !allowed_scopes.iter().any(|allowed_scope| allowed_scope == scope) { + return false; + } + + let Some(requester_agent_id) = requester_agent_id else { + return true; + }; + + if scope == "agent_private" { + return owner_agent_id == requester_agent_id; + } + if !matches!(scope, "project_shared" | "org_shared") { + return false; + } + if owner_agent_id == requester_agent_id { + return true; + } + + shared_grants.contains(&access::SharedSpaceGrantKey { + scope: scope.to_string(), + space_owner_agent_id: owner_agent_id.to_string(), + }) +} + +fn recallable_source_ref(source_ref: &KnowledgePageSourceRef) -> bool { + let Some(status) = source_ref.source_status.as_deref().map(str::trim) else { + return false; + }; + + if !matches!(status, "active" | "remember" | "update" | "current" | "historical" | "applied") { + return false; + } + + !has_non_recallable_span(&source_ref.source_snapshot) +} + +fn has_non_recallable_span(source_snapshot: &Value) -> bool { + match source_snapshot { + Value::Object(object) => + policy_spans_are_non_recallable(object.get("policy_spans")) + || object.get("source_span").is_some_and(span_is_non_recallable) + || source_spans_are_non_recallable(object.get("source_spans")) + || object.values().any(has_non_recallable_span), + Value::Array(items) => items.iter().any(has_non_recallable_span), + _ => false, + } +} + +fn policy_spans_are_non_recallable(policy_spans: Option<&Value>) -> bool { + match policy_spans { + Some(Value::Array(spans)) => !spans.is_empty(), + Some(Value::Null) | None => false, + Some(_) => true, + } +} + +fn source_spans_are_non_recallable(source_spans: Option<&Value>) -> bool { + match source_spans { + Some(Value::Array(spans)) => spans.iter().any(span_is_non_recallable), + Some(Value::Null) | None => false, + Some(_) => true, + } +} + +fn span_is_non_recallable(span: &Value) -> bool { + !matches!(span.get("status").and_then(Value::as_str), Some("captured")) +} + fn cloned_source_refs( source_refs: Option<&Vec>, ) -> Vec { @@ -2194,10 +2428,10 @@ fn knowledge_page_search_item( heading: row.heading, role: row.role, snippet: snippet_for_query(row.content.as_str(), query, SEARCH_SNIPPET_CHARS), - citations: row.citations, + citations: sanitize_search_citations(row.citations), citation_count, source_ref_count, - source_refs: source_refs.into_iter().map(KnowledgePageSourceRefResponse::from).collect(), + source_refs: source_refs.into_iter().map(search_source_ref_response).collect(), source_coverage: row.source_coverage, rebuild_metadata: row.rebuild_metadata, previous_version_diff, @@ -2212,6 +2446,45 @@ fn knowledge_page_search_item( } } +fn search_source_ref_response( + source_ref: KnowledgePageSourceRef, +) -> KnowledgePageSourceRefResponse { + let mut response = KnowledgePageSourceRefResponse::from(source_ref); + + if response.source_kind == KnowledgeSourceKind::Proposal.as_str() { + response.source_snapshot = sanitize_proposal_snapshot(&response.source_snapshot); + } + + response +} + +fn sanitize_search_citations(citations: Value) -> Value { + let Value::Array(citations) = citations else { + return citations; + }; + + Value::Array(citations.into_iter().map(sanitize_search_citation).collect()) +} + +fn sanitize_search_citation(mut citation: Value) -> Value { + let is_proposal = citation + .get("source_kind") + .and_then(Value::as_str) + .is_some_and(|kind| kind == KnowledgeSourceKind::Proposal.as_str()); + + if !is_proposal { + return citation; + } + + if let Some(object) = citation.as_object_mut() + && let Some(source_snapshot) = object.get_mut("source_snapshot") + { + *source_snapshot = sanitize_proposal_snapshot(source_snapshot); + } + + citation +} + fn search_trust_state( lint: &KnowledgePageLintSummary, coverage_complete: bool, @@ -2706,10 +2979,8 @@ fn proposal_source_snapshot(row: KnowledgeProposalSource) -> SourceSnapshot { "proposed_payload": row.proposed_payload.clone(), "review_state": row.review_state.clone(), })); - let summary = - row.diff.get("summary").and_then(Value::as_str).unwrap_or("Applied consolidation proposal"); - let line = format!("Applied proposal {}: {summary}", row.proposal_kind); - let snapshot = serde_json::json!({ + let line = format!("Applied proposal {}", row.proposal_kind); + let snapshot = sanitize_proposal_snapshot(&serde_json::json!({ "kind": "proposal", "proposal_id": row.proposal_id, "run_id": row.run_id, @@ -2728,7 +2999,7 @@ fn proposal_source_snapshot(row: KnowledgeProposalSource) -> SourceSnapshot { "target_ref": row.target_ref.clone(), "proposed_payload_hash": content_hash, "updated_at": row.updated_at, - }); + })); SourceSnapshot { kind: KnowledgeSourceKind::Proposal, @@ -2742,6 +3013,61 @@ fn proposal_source_snapshot(row: KnowledgeProposalSource) -> SourceSnapshot { } } +fn sanitize_proposal_snapshot(source_snapshot: &Value) -> Value { + let Some(object) = source_snapshot.as_object() else { + return serde_json::json!({ + "kind": "proposal", + "sanitized": true, + "source_visibility": "proposal_metadata_only", + }); + }; + let nested_source_count = + object.get("source_refs").and_then(Value::as_array).map(Vec::len).unwrap_or_default(); + let mut sanitized = Map::new(); + + for key in [ + "kind", + "proposal_id", + "run_id", + "agent_id", + "proposal_kind", + "apply_intent", + "review_state", + "confidence", + "proposed_payload_hash", + "updated_at", + ] { + if let Some(value) = object.get(key) { + sanitized.insert(key.to_string(), value.clone()); + } + } + + sanitized.insert("sanitized".to_string(), Value::Bool(true)); + sanitized.insert( + "source_visibility".to_string(), + Value::String("proposal_metadata_only".to_string()), + ); + sanitized.insert( + "omitted_fields".to_string(), + serde_json::json!([ + "source_refs", + "source_snapshot", + "lineage", + "diff", + "unsupported_claim_flags", + "contradiction_markers", + "staleness_markers", + "target_ref" + ]), + ); + sanitized.insert( + "nested_source_ref_count".to_string(), + Value::Number(Number::from(nested_source_count)), + ); + + Value::Object(sanitized) +} + fn source_citation_value(source: &SourceSnapshot) -> Value { serde_json::json!({ "source_kind": source.kind.as_str(), @@ -3386,11 +3712,20 @@ async fn insert_lint_finding( #[cfg(test)] mod tests { - use crate::knowledge::{ - self, DraftSection, KnowledgeDeltaMemoryCandidate, KnowledgePage, KnowledgePageKind, - KnowledgePageResponse, KnowledgePageSearchRow, KnowledgePageSection, - KnowledgePageSectionResponse, KnowledgePageSourceRef, KnowledgePageSourceRefResponse, - KnowledgePageSummary, KnowledgeSourceKind, LintDraft, OffsetDateTime, SourceSnapshot, Uuid, + use std::{ + collections::{BTreeSet, HashSet}, + slice, + }; + + use crate::{ + access::SharedSpaceGrantKey, + knowledge::{ + self, DraftSection, KnowledgeDeltaMemoryCandidate, KnowledgePage, KnowledgePageKind, + KnowledgePageResponse, KnowledgePageSearchRow, KnowledgePageSection, + KnowledgePageSectionResponse, KnowledgePageSourceRef, KnowledgePageSourceRefResponse, + KnowledgePageSummary, KnowledgeSourceKind, LintDraft, OffsetDateTime, SourceSnapshot, + Uuid, + }, }; use elf_domain::consolidation::ConsolidationApplyIntent; @@ -3757,6 +4092,241 @@ mod tests { assert!(item.snippet.contains("source notes")); } + #[test] + fn search_source_refs_suppress_deleted_and_unreviewed_sources() { + let section_id = Uuid::from_u128(70); + let mut active = test_source_ref(section_id); + let mut deleted = test_source_ref(section_id); + let mut ignored = test_source_ref(section_id); + let current_keys = current_source_keys_for(&[&active, &deleted, &ignored]); + + deleted.source_status = Some("deleted".to_string()); + ignored.source_status = Some("ignore".to_string()); + + assert!(knowledge::recallable_source_refs(slice::from_ref(&active), ¤t_keys)); + assert!(!knowledge::recallable_source_refs(&[deleted], ¤t_keys)); + assert!(!knowledge::recallable_source_refs(&[ignored], ¤t_keys)); + + active.source_status = None; + + assert!(!knowledge::recallable_source_refs(&[active], ¤t_keys)); + } + + #[test] + fn search_source_refs_suppress_non_captured_spans() { + let section_id = Uuid::from_u128(71); + let mut excluded = test_source_ref(section_id); + let mut source_ref_span = test_source_ref(section_id); + let mut policy_span = test_source_ref(section_id); + let mut malformed_span = test_source_ref(section_id); + let current_keys = + current_source_keys_for(&[&excluded, &source_ref_span, &policy_span, &malformed_span]); + + excluded.source_snapshot = serde_json::json!({ + "source_span": { + "schema": "doc_source_span/v1", + "status": "excluded", + "reason_code": "WRITE_POLICY_EXCLUSION" + } + }); + source_ref_span.source_snapshot = serde_json::json!({ + "source_ref": { + "source_spans": [ + { + "schema": "doc_source_span/v1", + "status": "redacted", + "reason_code": "WRITE_POLICY_REDACTION" + } + ] + } + }); + policy_span.source_snapshot = serde_json::json!({ + "source_ref": { + "policy_spans": [ + { + "schema": "doc_source_span/v1", + "status": "excluded", + "reason_code": "WRITE_POLICY_EXCLUSION" + } + ] + } + }); + malformed_span.source_snapshot = serde_json::json!({ + "source_span": { + "schema": "doc_source_span/v1", + "reason_code": "WRITE_POLICY_REDACTION" + } + }); + + assert!(!knowledge::recallable_source_refs(&[excluded], ¤t_keys)); + assert!(!knowledge::recallable_source_refs(&[source_ref_span], ¤t_keys)); + assert!(!knowledge::recallable_source_refs(&[policy_span], ¤t_keys)); + assert!(!knowledge::recallable_source_refs(&[malformed_span], ¤t_keys)); + } + + #[test] + fn search_source_refs_suppress_nested_proposal_non_captured_spans() { + let section_id = Uuid::from_u128(73); + let mut proposal = test_source_ref_for(section_id, Uuid::from_u128(74), "proposal-hash"); + + proposal.source_kind = KnowledgeSourceKind::Proposal.as_str().to_string(); + proposal.source_status = Some("applied".to_string()); + proposal.source_snapshot = serde_json::json!({ + "kind": "proposal", + "proposal_id": proposal.source_id, + "source_refs": [ + { + "kind": "doc_chunk", + "source_ref": { + "policy_spans": [ + { + "schema": "doc_source_span/v1", + "status": "excluded", + "reason_code": "WRITE_POLICY_EXCLUSION" + } + ] + } + } + ], + "source_snapshot": { + "sources": [ + { + "source_snapshot": { + "source_span": { + "schema": "doc_source_span/v1", + "status": "redacted", + "reason_code": "WRITE_POLICY_REDACTION" + } + } + } + ] + }, + "diff": { + "after": { + "source_ref": { + "source_spans": [ + { + "schema": "doc_source_span/v1", + "status": "excluded", + "reason_code": "WRITE_POLICY_EXCLUSION" + } + ] + } + } + } + }); + + let current_keys = current_source_keys_for(&[&proposal]); + + assert!(!knowledge::recallable_source_refs(&[proposal], ¤t_keys)); + } + + #[test] + fn search_item_sanitizes_proposal_citations_and_source_refs() { + let section_id = Uuid::from_u128(75); + let mut source_ref = test_source_ref_for(section_id, Uuid::from_u128(76), "proposal-hash"); + + source_ref.source_kind = KnowledgeSourceKind::Proposal.as_str().to_string(); + source_ref.source_status = Some("applied".to_string()); + source_ref.source_snapshot = serde_json::json!({ + "kind": "proposal", + "proposal_id": source_ref.source_id, + "proposal_kind": "create_derived_note", + "source_refs": [{ "kind": "doc", "source_id": Uuid::from_u128(77) }], + "source_snapshot": { "sources": [{ "source_snapshot": { "text": "private raw source" } }] }, + "lineage": { "parents": ["private"] }, + "diff": { "summary": "private raw diff" }, + "unsupported_claim_flags": [{ "quote": "private raw flag" }], + "target_ref": { "text": "private raw target" } + }); + + let row = KnowledgePageSearchRow { + page_id: Uuid::from_u128(78), + page_kind: "project".to_string(), + page_key: "elf".to_string(), + title: "ELF Knowledge".to_string(), + status: "active".to_string(), + source_coverage: serde_json::json!({ + "source_count": 1, + "cited_source_count": 1, + "coverage_complete": true + }), + rebuild_metadata: serde_json::json!({ "deterministic": true }), + page_updated_at: OffsetDateTime::UNIX_EPOCH, + rebuilt_at: OffsetDateTime::UNIX_EPOCH, + section_id, + section_key: "reviewed-proposals".to_string(), + heading: "Reviewed Proposals".to_string(), + role: "proposals".to_string(), + content: "Applied proposal create_derived_note".to_string(), + ordinal: 0, + citations: serde_json::json!([{ + "source_kind": "proposal", + "source_id": source_ref.source_id, + "source_snapshot": source_ref.source_snapshot.clone() + }]), + unsupported_reason: None, + lint_error_count: 0, + lint_warning_count: 0, + lint_info_count: 0, + section_source_ref_count: 1, + }; + let item = knowledge::knowledge_page_search_item(row, vec![source_ref], "proposal"); + let citation_snapshot = &item.citations[0]["source_snapshot"]; + let source_ref_snapshot = &item.source_refs[0].source_snapshot; + + assert_eq!(citation_snapshot["sanitized"], true); + assert_eq!(source_ref_snapshot["sanitized"], true); + assert!(citation_snapshot.get("source_refs").is_none()); + assert!(citation_snapshot.get("source_snapshot").is_none()); + assert!(citation_snapshot.get("diff").is_none()); + assert!(source_ref_snapshot.get("source_refs").is_none()); + assert!(source_ref_snapshot.get("source_snapshot").is_none()); + assert!(source_ref_snapshot.get("diff").is_none()); + } + + #[test] + fn search_source_refs_suppress_missing_current_sources() { + let section_id = Uuid::from_u128(72); + let source_ref = test_source_ref(section_id); + + assert!(!knowledge::recallable_source_refs(&[source_ref], &BTreeSet::new())); + } + + #[test] + fn source_row_read_allowed_requires_shared_grant_for_other_agent_sources() { + let allowed_scopes = vec!["agent_private".to_string(), "project_shared".to_string()]; + let shared_grants = HashSet::new(); + + assert!(knowledge::source_row_read_allowed( + "owner-agent", + "project_shared", + Some("owner-agent"), + &allowed_scopes, + &shared_grants + )); + assert!(!knowledge::source_row_read_allowed( + "owner-agent", + "project_shared", + Some("reader-agent"), + &allowed_scopes, + &shared_grants + )); + + let shared_grants = HashSet::from([SharedSpaceGrantKey { + scope: "project_shared".to_string(), + space_owner_agent_id: "owner-agent".to_string(), + }]); + + assert!(knowledge::source_row_read_allowed( + "owner-agent", + "project_shared", + Some("reader-agent"), + &allowed_scopes, + &shared_grants + )); + } + fn test_page() -> KnowledgePage { KnowledgePage { page_id: Uuid::from_u128(1), @@ -3832,6 +4402,15 @@ mod tests { } } + fn current_source_keys_for(source_refs: &[&KnowledgePageSourceRef]) -> BTreeSet { + source_refs + .iter() + .map(|source_ref| { + knowledge::current_key(source_ref.source_kind.as_str(), source_ref.source_id) + }) + .collect() + } + fn test_page_response(section_id: Uuid, source_id: Uuid) -> KnowledgePageResponse { let page = test_page(); let section = test_section( diff --git a/packages/elf-service/src/lib.rs b/packages/elf-service/src/lib.rs index 7a8190d..056200c 100644 --- a/packages/elf-service/src/lib.rs +++ b/packages/elf-service/src/lib.rs @@ -60,9 +60,9 @@ pub use self::{ }, delete::{DeleteRequest, DeleteResponse}, docs::{ - DocType, DocsExcerptResponse, DocsExcerptsGetRequest, DocsGetRequest, DocsGetResponse, - DocsPutRequest, DocsPutResponse, DocsSearchL0Request, DocsSearchL0Response, - TextPositionSelector, TextQuoteSelector, + DocType, DocsDeleteRequest, DocsDeleteResponse, DocsExcerptResponse, + DocsExcerptsGetRequest, DocsGetRequest, DocsGetResponse, DocsPutRequest, DocsPutResponse, + DocsSearchL0Request, DocsSearchL0Response, TextPositionSelector, TextQuoteSelector, }, dreaming_review_queue::{ DreamingReviewQueueAudit, DreamingReviewQueueItem, DreamingReviewQueueItemPolicy, diff --git a/packages/elf-service/src/recall_debug.rs b/packages/elf-service/src/recall_debug.rs index 149c346..e3ff635 100644 --- a/packages/elf-service/src/recall_debug.rs +++ b/packages/elf-service/src/recall_debug.rs @@ -603,6 +603,8 @@ WHERE trace_id = $1 .knowledge_pages_search(KnowledgePageSearchRequest { tenant_id: req.tenant_id.clone(), project_id: req.project_id.clone(), + agent_id: req.agent_id.clone(), + read_profile: req.read_profile.clone(), query: query.to_string(), page_kind: None, limit: Some(limit), @@ -845,11 +847,18 @@ FROM memory_notes org_shared_allowed, ) .await?; + let now = OffsetDateTime::now_utc(); Ok(rows .into_iter() .filter(|note| { - note_debug_read_allowed(note, req.agent_id.trim(), &allowed_scopes, &shared_grants) + note_debug_read_allowed( + note, + req.agent_id.trim(), + &allowed_scopes, + &shared_grants, + now, + ) }) .map(note_debug_source_pair) .collect()) @@ -872,7 +881,11 @@ fn note_debug_read_allowed( requester_agent_id: &str, allowed_scopes: &[String], shared_grants: &HashSet, + now: OffsetDateTime, ) -> bool { + if note.status != "active" || note.expires_at.is_some_and(|expires_at| expires_at <= now) { + return false; + } if !allowed_scopes.iter().any(|scope| scope == ¬e.scope) { return false; } @@ -1813,31 +1826,67 @@ mod tests { } #[test] - fn debug_note_readability_preserves_stale_owner_context_only() { + fn debug_note_readability_requires_current_note_and_scope_access() { let allowed_scopes = vec!["agent_private".to_string(), "project_shared".to_string()]; let shared_grants = HashSet::new(); - let mut note = note_for_debug_visibility("owner-agent", "agent_private", "deprecated"); + let now = OffsetDateTime::now_utc(); + let mut note = note_for_debug_visibility("owner-agent", "agent_private", "active"); assert!(recall_debug::note_debug_read_allowed( ¬e, "owner-agent", &allowed_scopes, - &shared_grants + &shared_grants, + now )); assert!(!recall_debug::note_debug_read_allowed( ¬e, "other-agent", &allowed_scopes, - &shared_grants + &shared_grants, + now + )); + + note.status = "deleted".to_string(); + + assert!(!recall_debug::note_debug_read_allowed( + ¬e, + "owner-agent", + &allowed_scopes, + &shared_grants, + now + )); + + note.status = "deprecated".to_string(); + + assert!(!recall_debug::note_debug_read_allowed( + ¬e, + "owner-agent", + &allowed_scopes, + &shared_grants, + now + )); + + note.status = "active".to_string(); + note.expires_at = Some(now); + + assert!(!recall_debug::note_debug_read_allowed( + ¬e, + "owner-agent", + &allowed_scopes, + &shared_grants, + now )); + note.expires_at = None; note.scope = "project_shared".to_string(); assert!(!recall_debug::note_debug_read_allowed( ¬e, "other-agent", &allowed_scopes, - &shared_grants + &shared_grants, + now )); let shared_grants = HashSet::from([SharedSpaceGrantKey { @@ -1849,7 +1898,8 @@ mod tests { ¬e, "other-agent", &allowed_scopes, - &shared_grants + &shared_grants, + now )); } diff --git a/packages/elf-service/src/search.rs b/packages/elf-service/src/search.rs index efbbccb..8101de2 100644 --- a/packages/elf-service/src/search.rs +++ b/packages/elf-service/src/search.rs @@ -73,6 +73,8 @@ WITH selected_facts AS ( gf.valid_to, (gf.valid_from <= $4 AND (gf.valid_to IS NULL OR gf.valid_to > $4)) AS is_current FROM unnest($7::uuid[]) AS snc(selected_note_id) + JOIN memory_notes selected_note + ON selected_note.note_id = snc.selected_note_id JOIN graph_fact_evidence gfe ON gfe.note_id = snc.selected_note_id JOIN graph_facts gf @@ -87,9 +89,32 @@ WITH selected_facts AS ( AND object_entity.project_id = $2 WHERE gf.tenant_id = $1 AND gf.project_id = $2 + AND selected_note.tenant_id = $1 + AND selected_note.project_id = $2 + AND selected_note.status = 'active' + AND ( + selected_note.expires_at IS NULL + OR selected_note.expires_at > $4 + ) + AND ( + ($5 AND selected_note.scope = 'agent_private' AND selected_note.agent_id = $3) + OR ( + selected_note.scope = ANY($6::text[]) + AND ( + selected_note.agent_id = $3 + OR concat(selected_note.scope, ':', selected_note.agent_id) = ANY($10::text[]) + ) + ) + ) AND ( ($5 AND gf.scope = 'agent_private' AND gf.agent_id = $3) - OR gf.scope = ANY($6::text[]) + OR ( + gf.scope = ANY($6::text[]) + AND ( + gf.agent_id = $3 + OR concat(gf.scope, ':', gf.agent_id) = ANY($10::text[]) + ) + ) ) AND gf.valid_from <= $4 ORDER BY @@ -164,6 +189,25 @@ evidence_ranked AS ( FROM bounded_facts bf JOIN graph_fact_evidence e ON e.fact_id = bf.fact_id + JOIN memory_notes evidence_note + ON evidence_note.note_id = e.note_id + AND evidence_note.tenant_id = $1 + AND evidence_note.project_id = $2 + AND evidence_note.status = 'active' + AND ( + evidence_note.expires_at IS NULL + OR evidence_note.expires_at > $4 + ) + AND ( + ($5 AND evidence_note.scope = 'agent_private' AND evidence_note.agent_id = $3) + OR ( + evidence_note.scope = ANY($6::text[]) + AND ( + evidence_note.agent_id = $3 + OR concat(evidence_note.scope, ':', evidence_note.agent_id) = ANY($10::text[]) + ) + ) + ) ), fact_contexts AS ( SELECT @@ -4675,6 +4719,16 @@ WHERE note_id = ANY($1::uuid[]) let private_allowed = allowed_scopes.iter().any(|scope| scope == "agent_private"); let non_private_scopes: Vec = allowed_scopes.iter().filter(|scope| *scope != "agent_private").cloned().collect(); + let org_shared_allowed = allowed_scopes.iter().any(|scope| scope == "org_shared"); + let shared_grants = access::load_shared_read_grants_with_org_shared( + &self.db.pool, + tenant_id, + project_id, + agent_id, + org_shared_allowed, + ) + .await?; + let shared_scope_keys = access::shared_scope_key_strings(&shared_grants); let (max_evidence_notes_per_fact, max_facts_per_item) = self.relation_context_bounds(); let rows = self .fetch_relation_context_rows( @@ -4683,6 +4737,7 @@ WHERE note_id = ANY($1::uuid[]) project_id, agent_id, &non_private_scopes, + shared_scope_keys.as_slice(), private_allowed, now, max_evidence_notes_per_fact, @@ -4711,6 +4766,7 @@ WHERE note_id = ANY($1::uuid[]) project_id: &str, agent_id: &str, non_private_scopes: &[String], + shared_scope_keys: &[String], private_allowed: bool, now: OffsetDateTime, max_evidence_notes_per_fact: i32, @@ -4726,6 +4782,7 @@ WHERE note_id = ANY($1::uuid[]) .bind(note_ids) .bind(max_evidence_notes_per_fact) .bind(max_facts_per_item) + .bind(shared_scope_keys) .fetch_all(&self.db.pool) .await?) } @@ -4737,6 +4794,10 @@ WHERE note_id = ANY($1::uuid[]) HashMap::new(); for row in rows { + if row.evidence_note_ids.is_empty() { + continue; + } + let object = if row.object_entity_id.is_some() { SearchExplainRelationContextObject { entity: Some(SearchExplainRelationEntityRef { @@ -6597,6 +6658,43 @@ mod tests { assert!(audit.get("token_id").is_none()); } + #[test] + fn relation_context_rows_without_evidence_are_suppressed() { + let now = OffsetDateTime::from_unix_timestamp(100).expect("valid timestamp"); + let note_id = Uuid::from_u128(1); + let contexts = crate::ElfService::group_relation_context_rows(vec![ + search::SearchRelationContextRow { + note_id, + fact_id: Uuid::from_u128(2), + scope: "project_shared".to_string(), + subject_canonical: Some("Alice".to_string()), + subject_kind: Some("person".to_string()), + predicate: "prefers".to_string(), + object_entity_id: None, + object_canonical: None, + object_kind: None, + object_value: Some("source-bound recall".to_string()), + valid_from: now, + valid_to: None, + is_current: true, + evidence_note_ids: Vec::new(), + }, + ]); + + assert!(!contexts.contains_key(¬e_id)); + } + + #[test] + fn relation_context_sql_enforces_shared_grant_keys() { + assert!( + search::RELATION_CONTEXT_SQL + .contains("concat(gf.scope, ':', gf.agent_id) = ANY($10::text[])") + ); + assert!(search::RELATION_CONTEXT_SQL.contains( + "concat(evidence_note.scope, ':', evidence_note.agent_id) = ANY($10::text[])" + )); + } + fn test_chunk_candidate(note_id: Uuid, retrieval_rank: u32) -> ChunkCandidate { ChunkCandidate { chunk_id: Uuid::new_v4(), diff --git a/packages/elf-service/tests/acceptance/docs_extension_v1.rs b/packages/elf-service/tests/acceptance/docs_extension_v1.rs index e575c3f..31646cf 100644 --- a/packages/elf-service/tests/acceptance/docs_extension_v1.rs +++ b/packages/elf-service/tests/acceptance/docs_extension_v1.rs @@ -20,10 +20,10 @@ use uuid::Uuid; use crate::acceptance::{self, SpyExtractor, StubEmbedding, StubRerank, chunking::ChunkingConfig}; use elf_config::EmbeddingProviderConfig; use elf_service::{ - AddNoteInput, AddNoteRequest, BoxFuture, DocsExcerptsGetRequest, DocsGetRequest, - DocsPutRequest, DocsPutResponse, DocsSearchL0Request, ElfService, EmbeddingProvider, Error, - PayloadLevel, Providers, Result, SearchRequest, TextQuoteSelector, - docs::DocRetrievalTrajectory, + AddNoteInput, AddNoteRequest, BoxFuture, DocsDeleteRequest, DocsExcerptsGetRequest, + DocsGetRequest, DocsPutRequest, DocsPutResponse, DocsSearchL0Request, ElfService, + EmbeddingProvider, Error, NoteOp, PayloadLevel, Providers, Result, SearchRequest, + TextQuoteSelector, docs::DocRetrievalTrajectory, }; use elf_storage::{db::Db, qdrant::QdrantStore}; use elf_testkit::TestDatabase; @@ -271,6 +271,107 @@ async fn docs_put_get_excerpts_and_search_l0_work_end_to_end() { test_db.cleanup().await.expect("Failed to cleanup test database."); } +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run."] +async fn docs_delete_marks_doc_deleted_and_removes_doc_vectors() { + let Some(ctx) = setup_docs_context().await else { return }; + let DocsContext { test_db, service } = ctx; + let put = put_test_doc(&service).await; + let (handle, shutdown) = spawn_doc_worker(&service).await; + + assert!( + wait_for_doc_outbox_done(&service.db.pool, put.doc_id, std::time::Duration::from_secs(15)) + .await, + "Expected doc UPSERT outbox to reach DONE." + ); + assert!( + fetch_first_doc_chunk_point(&service, put.doc_id).await.is_some(), + "Expected indexed doc chunk before delete." + ); + + let deleted = service + .docs_delete(DocsDeleteRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "owner".to_string(), + doc_id: put.doc_id, + }) + .await + .expect("Failed to delete Source Library doc."); + + assert_eq!(deleted.doc_id, put.doc_id); + assert_eq!(deleted.op, NoteOp::Delete); + assert!(deleted.chunk_delete_count > 0); + assert!( + wait_for_doc_outbox_done(&service.db.pool, put.doc_id, std::time::Duration::from_secs(15)) + .await, + "Expected doc DELETE outbox to reach DONE." + ); + + let get_after_delete = service + .docs_get(DocsGetRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "owner".to_string(), + read_profile: "private_plus_project".to_string(), + doc_id: put.doc_id, + }) + .await; + let search_after_delete = service + .docs_search_l0(DocsSearchL0Request { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + caller_agent_id: "reader".to_string(), + scope: None, + status: None, + doc_type: None, + sparse_mode: None, + domain: None, + repo: None, + agent_id: None, + thread_id: None, + updated_after: None, + updated_before: None, + ts_gte: None, + ts_lte: None, + read_profile: "private_plus_project".to_string(), + query: "peregrine".to_string(), + top_k: Some(5), + candidate_k: Some(20), + explain: None, + }) + .await + .expect("Failed to search docs after delete."); + let second_delete = service + .docs_delete(DocsDeleteRequest { + tenant_id: "t".to_string(), + project_id: "p".to_string(), + agent_id: "owner".to_string(), + doc_id: put.doc_id, + }) + .await + .expect("Second Source Library delete should be idempotent."); + + assert!(matches!(get_after_delete, Err(Error::NotFound { .. }))); + assert!(search_after_delete.items.iter().all(|item| item.doc_id != put.doc_id)); + assert!( + fetch_first_doc_chunk_point(&service, put.doc_id).await.is_none(), + "Deleted Source Library doc chunk must be removed from Qdrant docs index." + ); + assert_eq!(second_delete.op, NoteOp::None); + assert_eq!(second_delete.chunk_delete_count, 0); + + let _ = shutdown.send(()); + + handle.abort(); + + let _ = handle.await; + + drop(service); + + test_db.cleanup().await.expect("Failed to cleanup test database."); +} + #[tokio::test] #[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL (or ELF_QDRANT_GRPC_URL) to run."] async fn docs_put_source_library_records_do_not_create_memory_notes() { diff --git a/packages/elf-service/tests/acceptance/graph_ingestion.rs b/packages/elf-service/tests/acceptance/graph_ingestion.rs index 511c219..700b8c5 100644 --- a/packages/elf-service/tests/acceptance/graph_ingestion.rs +++ b/packages/elf-service/tests/acceptance/graph_ingestion.rs @@ -12,9 +12,9 @@ use crate::acceptance::{self, SpyExtractor, StubEmbedding, StubRerank}; use elf_config::EmbeddingProviderConfig; use elf_domain::memory_policy::MemoryPolicyDecision; use elf_service::{ - AddEventRequest, AddNoteInput, AddNoteRequest, BoxFuture, ElfService, EmbeddingProvider, - EventMessage, GraphQueryEntityRef, GraphQueryPredicateRef, GraphQueryRequest, NoteOp, - Providers, RelationTemporalStatus, Result, StructuredFields, + AddEventRequest, AddNoteInput, AddNoteRequest, BoxFuture, DeleteRequest, ElfService, + EmbeddingProvider, EventMessage, GraphQueryEntityRef, GraphQueryPredicateRef, + GraphQueryRequest, NoteOp, Providers, RelationTemporalStatus, Result, StructuredFields, }; const TEST_TENANT: &str = "t"; @@ -723,6 +723,100 @@ async fn add_note_persists_graph_relations() { test_db.cleanup().await.expect("Failed to cleanup test database."); } +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run."] +async fn graph_query_suppresses_deleted_evidence_notes() { + let Some(test_db) = acceptance::test_db().await else { + eprintln!("Skipping graph_query_suppresses_deleted_evidence_notes; set ELF_PG_DSN to run."); + + return; + }; + let Some(qdrant_url) = acceptance::test_qdrant_url() else { + eprintln!( + "Skipping graph_query_suppresses_deleted_evidence_notes; set ELF_QDRANT_URL to run.", + ); + + return; + }; + let providers = Providers::new( + Arc::new(StubEmbedding { vector_dim: 4_096 }), + Arc::new(StubRerank), + Arc::new(SpyExtractor { + calls: Arc::new(AtomicUsize::new(0)), + payload: serde_json::json!({ "notes": [] }), + }), + ); + let collection = test_db.collection_name("elf_acceptance"); + let docs_collection = test_db.collection_name("elf_acceptance_docs"); + let cfg = acceptance::test_config( + test_db.dsn().to_string(), + qdrant_url, + 4_096, + collection, + docs_collection, + ); + let service = + acceptance::build_service(cfg, providers).await.expect("Failed to build service."); + + acceptance::reset_db(&service.db.pool).await.expect("Failed to reset test database."); + + let note_id = + add_fact_note(&service, "mentorship", "Alice mentors Bob.", "mentors", "Bob").await; + let before_delete = service + .graph_query(GraphQueryRequest { + tenant_id: TEST_TENANT.to_string(), + project_id: TEST_PROJECT.to_string(), + agent_id: "a".to_string(), + read_profile: "private_only".to_string(), + subject: GraphQueryEntityRef::Surface { surface: "Alice".to_string() }, + predicate: Some(GraphQueryPredicateRef::Surface { surface: "mentors".to_string() }), + scopes: Some(vec![TEST_SCOPE.to_string()]), + as_of: None, + limit: Some(10), + explain: Some(true), + }) + .await + .expect("graph query before delete should succeed"); + + assert_eq!(before_delete.facts.len(), 1); + assert_eq!(before_delete.facts[0].evidence_note_ids, vec![note_id]); + + let delete = service + .delete(DeleteRequest { + tenant_id: TEST_TENANT.to_string(), + project_id: TEST_PROJECT.to_string(), + agent_id: "a".to_string(), + note_id, + }) + .await + .expect("note delete should succeed"); + + assert_eq!(delete.op, NoteOp::Delete); + + let after_delete = service + .graph_query(GraphQueryRequest { + tenant_id: TEST_TENANT.to_string(), + project_id: TEST_PROJECT.to_string(), + agent_id: "a".to_string(), + read_profile: "private_only".to_string(), + subject: GraphQueryEntityRef::Surface { surface: "Alice".to_string() }, + predicate: Some(GraphQueryPredicateRef::Surface { surface: "mentors".to_string() }), + scopes: Some(vec![TEST_SCOPE.to_string()]), + as_of: None, + limit: Some(10), + explain: Some(true), + }) + .await + .expect("graph query after delete should succeed"); + + assert!( + after_delete.facts.is_empty(), + "graph facts without active readable evidence notes must be suppressed" + ); + + test_db.cleanup().await.expect("Failed to cleanup test database."); +} + #[tokio::test] #[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run."] async fn add_event_persists_graph_relations() { diff --git a/packages/elf-service/tests/acceptance/knowledge_pages.rs b/packages/elf-service/tests/acceptance/knowledge_pages.rs index 2aa85c8..81dd0c5 100644 --- a/packages/elf-service/tests/acceptance/knowledge_pages.rs +++ b/packages/elf-service/tests/acceptance/knowledge_pages.rs @@ -7,7 +7,8 @@ use crate::acceptance::{self, SpyExtractor, StubEmbedding, StubRerank}; use elf_domain::knowledge::KnowledgePageKind; use elf_service::{ AddNoteInput, AddNoteRequest, ElfService, KnowledgePageLintRequest, - KnowledgePageRebuildRequest, KnowledgePageRebuildResponse, Providers, + KnowledgePageRebuildRequest, KnowledgePageRebuildResponse, KnowledgePageSearchRequest, + Providers, }; use elf_testkit::TestDatabase; @@ -518,3 +519,112 @@ WHERE note_id = $3", && finding.source_id == Some(source_ids.note_id) })); } + +#[tokio::test] +#[ignore = "Requires external Postgres and Qdrant. Set ELF_PG_DSN and ELF_QDRANT_URL to run this test."] +async fn knowledge_page_search_suppresses_deleted_source_library_spans() { + let Some(fixture) = + setup_service("knowledge_page_search_suppresses_deleted_source_library_spans").await + else { + return; + }; + let service = &fixture.service; + let source_ids = insert_rebuild_sources(service).await; + let page = service + .knowledge_page_rebuild(knowledge_foundation_request(source_ids)) + .await + .expect("knowledge page should rebuild"); + let before_delete = service + .knowledge_pages_search(KnowledgePageSearchRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + agent_id: AGENT_ID.to_string(), + read_profile: "private_plus_project".to_string(), + query: "Source Library spans".to_string(), + page_kind: Some(KnowledgePageKind::Project), + limit: Some(10), + }) + .await + .expect("knowledge page search should run"); + + assert!( + before_delete.items.iter().any(|item| item.page_id == page.page.page.page_id + && item.source_refs.iter().any(|source_ref| { + source_ref.source_kind == "doc" || source_ref.source_kind == "doc_chunk" + })), + "expected search to return the Source Library-backed page section before delete" + ); + + let private_only = service + .knowledge_pages_search(KnowledgePageSearchRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + agent_id: AGENT_ID.to_string(), + read_profile: "private_only".to_string(), + query: "Source Library spans".to_string(), + page_kind: Some(KnowledgePageKind::Project), + limit: Some(10), + }) + .await + .expect("knowledge page search should run"); + + assert!( + private_only.items.iter().all(|item| { + !item.source_refs.iter().any(|source_ref| { + source_ref.source_kind == "doc" || source_ref.source_kind == "doc_chunk" + }) + }), + "private_only search must not recall project-shared Source Library snippets" + ); + + let ungranted_shared_reader = service + .knowledge_pages_search(KnowledgePageSearchRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + agent_id: "agent_without_source_grant".to_string(), + read_profile: "private_plus_project".to_string(), + query: "Source Library spans".to_string(), + page_kind: Some(KnowledgePageKind::Project), + limit: Some(10), + }) + .await + .expect("knowledge page search should run"); + + assert!( + ungranted_shared_reader.items.iter().all(|item| { + !item.source_refs.iter().any(|source_ref| { + source_ref.source_kind == "doc" || source_ref.source_kind == "doc_chunk" + }) + }), + "project-shared Source Library snippets require an owner or active shared grant" + ); + + sqlx::query("UPDATE doc_documents SET status = 'deleted', updated_at = $1 WHERE doc_id = $2") + .bind(OffsetDateTime::now_utc()) + .bind(source_ids.doc_id) + .execute(&service.db.pool) + .await + .expect("source document should be marked deleted"); + + let after_delete = service + .knowledge_pages_search(KnowledgePageSearchRequest { + tenant_id: TENANT_ID.to_string(), + project_id: PROJECT_ID.to_string(), + agent_id: AGENT_ID.to_string(), + read_profile: "private_plus_project".to_string(), + query: "Source Library spans".to_string(), + page_kind: Some(KnowledgePageKind::Project), + limit: Some(10), + }) + .await + .expect("knowledge page search should run"); + + assert!( + after_delete.items.iter().all(|item| { + !item.source_refs.iter().any(|source_ref| { + source_ref.source_kind == "doc" || source_ref.source_kind == "doc_chunk" + }) + }), + "deleted Source Library docs and chunks must not be recalled through derived page search" + ); +} diff --git a/packages/elf-storage/src/knowledge.rs b/packages/elf-storage/src/knowledge.rs index 94902ab..685f3d9 100644 --- a/packages/elf-storage/src/knowledge.rs +++ b/packages/elf-storage/src/knowledge.rs @@ -120,6 +120,24 @@ pub struct KnowledgePageLintFindingInsert<'a> { pub now: OffsetDateTime, } +/// Parameters for fetching graph relation sources for knowledge pages. +pub struct KnowledgeRelationSourcesFetch<'a> { + /// Tenant that owns the relation sources. + pub tenant_id: &'a str, + /// Project that owns the relation sources. + pub project_id: &'a str, + /// Agent requesting source readback, when visibility should be caller-scoped. + pub agent_id: Option<&'a str>, + /// Scopes allowed by the caller read profile. + pub allowed_scopes: &'a [String], + /// Shared owner/scope grant keys readable by the caller. + pub shared_scope_keys: &'a [String], + /// Whether private scope is readable by the caller. + pub private_allowed: bool, + /// Graph fact identifiers to fetch. + pub fact_ids: &'a [Uuid], +} + /// Authoritative note source row used by the knowledge page rebuilder. #[derive(Debug, FromRow)] pub struct KnowledgeNoteSource { @@ -1031,6 +1049,8 @@ pub async fn fetch_knowledge_note_sources<'e, E>( executor: E, tenant_id: &str, project_id: &str, + agent_id: Option<&str>, + allowed_scopes: &[String], note_ids: &[Uuid], ) -> Result> where @@ -1060,11 +1080,17 @@ SELECT FROM memory_notes WHERE tenant_id = $1 AND project_id = $2 - AND note_id = ANY($3::uuid[]) + AND ($3::text IS NULL OR scope <> 'agent_private' OR agent_id = $3) + AND scope = ANY($4::text[]) + AND note_id = ANY($5::uuid[]) + AND status = 'active' + AND (expires_at IS NULL OR expires_at > now()) ORDER BY updated_at ASC, note_id ASC", ) .bind(tenant_id) .bind(project_id) + .bind(agent_id) + .bind(allowed_scopes) .bind(note_ids) .fetch_all(executor) .await?; @@ -1077,6 +1103,8 @@ pub async fn fetch_knowledge_event_sources<'e, E>( executor: E, tenant_id: &str, project_id: &str, + agent_id: Option<&str>, + allowed_scopes: &[String], decision_ids: &[Uuid], ) -> Result> where @@ -1089,27 +1117,39 @@ where let rows = sqlx::query_as::<_, KnowledgeEventSource>( "\ SELECT - decision_id, - agent_id, - scope, - pipeline, - note_type, - note_key, - note_id, - policy_decision, - note_op, - reason_code, - details, - ts + memory_ingest_decisions.decision_id, + memory_ingest_decisions.agent_id, + memory_ingest_decisions.scope, + memory_ingest_decisions.pipeline, + memory_ingest_decisions.note_type, + memory_ingest_decisions.note_key, + memory_ingest_decisions.note_id, + memory_ingest_decisions.policy_decision, + memory_ingest_decisions.note_op, + memory_ingest_decisions.reason_code, + memory_ingest_decisions.details, + memory_ingest_decisions.ts FROM memory_ingest_decisions -WHERE tenant_id = $1 - AND project_id = $2 - AND decision_id = ANY($3::uuid[]) - AND pipeline = 'add_event' -ORDER BY ts ASC, decision_id ASC", +JOIN memory_notes note ON note.note_id = memory_ingest_decisions.note_id +WHERE memory_ingest_decisions.tenant_id = $1 + AND memory_ingest_decisions.project_id = $2 + AND ($3::text IS NULL OR memory_ingest_decisions.scope <> 'agent_private' OR memory_ingest_decisions.agent_id = $3) + AND memory_ingest_decisions.scope = ANY($4::text[]) + AND memory_ingest_decisions.decision_id = ANY($5::uuid[]) + AND memory_ingest_decisions.pipeline = 'add_event' + AND memory_ingest_decisions.policy_decision IN ('remember', 'update') + AND note.tenant_id = memory_ingest_decisions.tenant_id + AND note.project_id = memory_ingest_decisions.project_id + AND note.status = 'active' + AND (note.expires_at IS NULL OR note.expires_at > now()) + AND ($3::text IS NULL OR note.scope <> 'agent_private' OR note.agent_id = $3) + AND note.scope = ANY($4::text[]) +ORDER BY memory_ingest_decisions.ts ASC, memory_ingest_decisions.decision_id ASC", ) .bind(tenant_id) .bind(project_id) + .bind(agent_id) + .bind(allowed_scopes) .bind(decision_ids) .fetch_all(executor) .await?; @@ -1120,14 +1160,12 @@ ORDER BY ts ASC, decision_id ASC", /// Fetches relation sources by graph fact identifier for a knowledge page rebuild. pub async fn fetch_knowledge_relation_sources<'e, E>( executor: E, - tenant_id: &str, - project_id: &str, - fact_ids: &[Uuid], + params: KnowledgeRelationSourcesFetch<'_>, ) -> Result> where E: PgExecutor<'e>, { - if fact_ids.is_empty() { + if params.fact_ids.is_empty() { return Ok(Vec::new()); } @@ -1154,7 +1192,25 @@ SELECT 'updated_at', note.updated_at ) ORDER BY evidence.created_at ASC, evidence.note_id ASC - ) FILTER (WHERE evidence.note_id IS NOT NULL), + ) FILTER ( + WHERE evidence.note_id IS NOT NULL + AND note.tenant_id = gf.tenant_id + AND note.project_id = gf.project_id + AND note.status = 'active' + AND (note.expires_at IS NULL OR note.expires_at > now()) + AND note.scope = ANY($4::text[]) + AND ( + $3::text IS NULL + OR ($6 AND note.scope = 'agent_private' AND note.agent_id = $3) + OR ( + note.scope <> 'agent_private' + AND ( + note.agent_id = $3 + OR concat(note.scope, ':', note.agent_id) = ANY($5::text[]) + ) + ) + ) + ), '[]'::jsonb ) AS evidence_notes FROM graph_facts gf @@ -1164,7 +1220,42 @@ LEFT JOIN graph_fact_evidence evidence ON evidence.fact_id = gf.fact_id LEFT JOIN memory_notes note ON note.note_id = evidence.note_id WHERE gf.tenant_id = $1 AND gf.project_id = $2 - AND gf.fact_id = ANY($3::uuid[]) + AND gf.scope = ANY($4::text[]) + AND ( + $3::text IS NULL + OR ($6 AND gf.scope = 'agent_private' AND gf.agent_id = $3) + OR ( + gf.scope <> 'agent_private' + AND ( + gf.agent_id = $3 + OR concat(gf.scope, ':', gf.agent_id) = ANY($5::text[]) + ) + ) + ) + AND gf.fact_id = ANY($7::uuid[]) + AND EXISTS ( + SELECT 1 + FROM graph_fact_evidence readable_evidence + JOIN memory_notes readable_note + ON readable_note.note_id = readable_evidence.note_id + WHERE readable_evidence.fact_id = gf.fact_id + AND readable_note.tenant_id = gf.tenant_id + AND readable_note.project_id = gf.project_id + AND readable_note.status = 'active' + AND (readable_note.expires_at IS NULL OR readable_note.expires_at > now()) + AND readable_note.scope = ANY($4::text[]) + AND ( + $3::text IS NULL + OR ($6 AND readable_note.scope = 'agent_private' AND readable_note.agent_id = $3) + OR ( + readable_note.scope <> 'agent_private' + AND ( + readable_note.agent_id = $3 + OR concat(readable_note.scope, ':', readable_note.agent_id) = ANY($5::text[]) + ) + ) + ) + ) GROUP BY gf.fact_id, gf.agent_id, @@ -1180,9 +1271,13 @@ GROUP BY gf.updated_at ORDER BY gf.updated_at ASC, gf.fact_id ASC", ) - .bind(tenant_id) - .bind(project_id) - .bind(fact_ids) + .bind(params.tenant_id) + .bind(params.project_id) + .bind(params.agent_id) + .bind(params.allowed_scopes) + .bind(params.shared_scope_keys) + .bind(params.private_allowed) + .bind(params.fact_ids) .fetch_all(executor) .await?; @@ -1244,6 +1339,8 @@ pub async fn fetch_knowledge_doc_sources<'e, E>( executor: E, tenant_id: &str, project_id: &str, + agent_id: Option<&str>, + allowed_scopes: &[String], doc_ids: &[Uuid], ) -> Result> where @@ -1271,12 +1368,16 @@ SELECT FROM doc_documents WHERE tenant_id = $1 AND project_id = $2 - AND doc_id = ANY($3::uuid[]) + AND ($3::text IS NULL OR scope <> 'agent_private' OR agent_id = $3) + AND scope = ANY($4::text[]) + AND doc_id = ANY($5::uuid[]) AND status = 'active' ORDER BY updated_at ASC, doc_id ASC", ) .bind(tenant_id) .bind(project_id) + .bind(agent_id) + .bind(allowed_scopes) .bind(doc_ids) .fetch_all(executor) .await?; @@ -1289,6 +1390,8 @@ pub async fn fetch_knowledge_doc_chunk_sources<'e, E>( executor: E, tenant_id: &str, project_id: &str, + agent_id: Option<&str>, + allowed_scopes: &[String], chunk_ids: &[Uuid], ) -> Result> where @@ -1321,12 +1424,16 @@ FROM doc_chunks c JOIN doc_documents d ON d.doc_id = c.doc_id WHERE d.tenant_id = $1 AND d.project_id = $2 - AND c.chunk_id = ANY($3::uuid[]) + AND ($3::text IS NULL OR d.scope <> 'agent_private' OR d.agent_id = $3) + AND d.scope = ANY($4::text[]) + AND c.chunk_id = ANY($5::uuid[]) AND d.status = 'active' ORDER BY d.updated_at ASC, c.chunk_index ASC, c.chunk_id ASC", ) .bind(tenant_id) .bind(project_id) + .bind(agent_id) + .bind(allowed_scopes) .bind(chunk_ids) .fetch_all(executor) .await?;