hack-ink · yvette-carlisle · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026
diff --git a/apps/elf-mcp/src/server.rs b/apps/elf-mcp/src/server.rs
@@ -1143,7 +1143,44 @@ fn docs_put_schema() -> Arc<JsonObject> {
 				"repo": { "type": "string" },
 				"commit_sha": { "type": "string" },
 				"pr_number": { "type": "integer" },
-				"issue_number": { "type": "integer" }
+				"issue_number": { "type": "integer" },
+				"source_kind": {
+					"type": "string",
+					"enum": ["article", "social_thread", "pdf", "text_export", "repo_file", "chat_excerpt", "web_page"]
+				},
+				"canonical_uri": { "type": "string" },
+				"captured_at": { "type": "string", "format": "date-time" },
+				"source_created_at": { "type": "string", "format": "date-time" },
+				"trust_label": {
+					"type": "string",
+					"enum": ["trusted", "user_captured", "public_web", "third_party", "unverified"]
+				},
+				"author": { "type": "string" },
+				"handle": { "type": "string" },
+				"source_content_hash": { "type": "string" },
+				"excerpt_locator": {
+					"type": "object",
+					"additionalProperties": true,
+					"properties": {
+						"quote": {
+							"type": "object",
+							"required": ["exact"],
+							"properties": {
+								"exact": { "type": "string" },
+								"prefix": { "type": "string" },
+								"suffix": { "type": "string" }
+							}
+						},
+						"position": {
+							"type": "object",
+							"required": ["start", "end"],
+							"properties": {
+								"start": { "type": "integer" },
+								"end": { "type": "integer" }
+							}
+						}
+					}
+				}
 			},
 			"allOf": [
 				{
@@ -2287,6 +2324,11 @@ mod tests {
 		}
 
 		let write_policy = properties.get("write_policy").and_then(serde_json::Value::as_object);
+		let source_ref_properties = properties
+			.get("source_ref")
+			.and_then(|value| value.get("properties"))
+			.and_then(serde_json::Value::as_object)
+			.expect("docs_put source_ref schema is missing properties.");
 
 		assert!(
 			write_policy.is_some_and(|field| {
@@ -2297,6 +2339,15 @@ mod tests {
 			}),
 			"Missing write_policy object/null type in docs_put schema."
 		);
+
+		for field in
+			["source_kind", "canonical_uri", "captured_at", "trust_label", "excerpt_locator"]
+		{
+			assert!(
+				source_ref_properties.contains_key(field),
+				"Missing source_ref field: {field}."
+			);
+		}
 	}
 
 	#[test]

diff --git a/docs/spec/system_doc_source_ref_v1.md b/docs/spec/system_doc_source_ref_v1.md
@@ -6,12 +6,15 @@ resource: docs/spec/system_doc_source_ref_v1.md
 status: active
 authority: normative
 owner: spec
-last_verified: 2026-06-20
+last_verified: 2026-06-22
 tags:
   - docs
   - spec
 source_refs: []
-code_refs: []
+code_refs:
+  - apps/elf-mcp/src/server.rs
+  - packages/elf-service/src/docs.rs
+  - packages/elf-storage/src/docs.rs
 related: []
 drift_watch:
   - docs/spec/system_doc_source_ref_v1.md
@@ -189,6 +192,71 @@ Boundary:
   create or mutate durable Memory Notes unless the caller separately invokes an
   explicit memory-write or reviewed promotion path.
 
+Normalized capture output:
+
+- `docs_put` MUST return `source_capture.schema = "doc_source_capture/v1"`.
+- `source_capture.source_record_id` MUST equal the stored `doc_documents.doc_id`.
+- `source_capture.origin` MUST be the canonical source origin used for operator
+  inspection and deduplication. Source Library `canonical_uri` takes precedence
+  over legacy URL, URI, thread, search, or repo-derived origins.
+- `source_capture.captured_at` MUST be the Source Library `captured_at` value
+  when present. If the Source Library profile is not active, the service may use
+  the service capture timestamp.
+- `source_capture.content_hash` MUST be the BLAKE3 hex hash of the persisted
+  document content after write-policy transforms.
+- `source_capture.visibility_scope` MUST be the document scope.
+- `source_capture.title` SHOULD be copied from the request title when present.
+- `source_capture.source_type` MUST be `source_kind` when present, otherwise the
+  normalized `doc_type`.
+- `source_capture.source_spans` MUST list stable span references for persisted
+  chunks.
+- `source_capture.policy_spans` MUST list excluded or redacted spans when
+  write-policy hooks remove or transform source content.
+
+Stable source records and spans:
+
+- `doc_documents.doc_id` is the Source Library source record id for captured
+  docs. It MUST be deterministic for the same tenant, effective project, agent,
+  scope, doc type, source identity, and persisted content hash.
+- Persisted chunk ids MUST be deterministic for the same source record id and
+  chunk index.
+- Captured source span ids MUST be deterministic for the same persisted content
+  hash, byte offsets, and span status.
+- Captured span offsets are byte offsets into the persisted document content.
+- Policy span offsets are byte offsets into the original request content before
+  write-policy transforms.
+
+`doc_source_span/v1` fields:
+
+- `schema` (string): exact value `doc_source_span/v1`.
+- `span_id` (string UUID): stable span identifier.
+- `chunk_id` (string UUID, optional): present for persisted captured chunks.
+- `status` (string): `captured`, `excluded`, or `redacted`.
+- `reason_code` (string, optional): required for non-captured spans.
+- `start_offset` and `end_offset` (integers): byte offsets, with
+  `start_offset <= end_offset`.
+- `content_hash` (string): BLAKE3 hex hash for the content the offsets address.
+- `chunk_hash` (string, optional): BLAKE3 hex hash for captured chunk text.
+
+Typed policy span reasons:
+
+- Excluded spans MUST use `reason_code = "WRITE_POLICY_EXCLUSION"`.
+- Redacted spans MUST use `reason_code = "WRITE_POLICY_REDACTION"`.
+- Unsupported or policy-removed content MUST be represented through a typed span
+  reason or a typed validation error. It MUST NOT disappear silently from Source
+  Library audit surfaces.
+
+Persisted normalized `source_ref`:
+
+- The stored `doc_documents.source_ref` MUST retain the caller-provided
+  `doc_source_ref/v1` fields and add normalized capture fields:
+  `source_record_id`, `origin`, `captured_at`, `content_hash`,
+  `visibility_scope`, `source_type`, and `source_spans`.
+- When policy spans exist, stored `doc_documents.source_ref` MUST include
+  `policy_spans`.
+- Normalized capture fields are evidence metadata only. They MUST NOT promote a
+  source record into approved Memory Authority.
+
 ==================================================
 6) Examples
 ==================================================

diff --git a/docs/spec/system_source_ref_doc_pointer_v1.md b/docs/spec/system_source_ref_doc_pointer_v1.md
@@ -6,12 +6,13 @@ resource: docs/spec/system_source_ref_doc_pointer_v1.md
 status: active
 authority: normative
 owner: spec
-last_verified: 2026-06-20
+last_verified: 2026-06-22
 tags:
   - docs
   - spec
 source_refs: []
-code_refs: []
+code_refs:
+  - packages/elf-service/src/docs.rs
 related: []
 drift_watch:
   - docs/spec/system_source_ref_doc_pointer_v1.md
@@ -86,10 +87,16 @@ All keys and string values SHOULD be ASCII-safe and stable over time.
 `ref` MAY include:
 
 - `chunk_id` (string): UUID of a specific chunk. Use when the pointer came from `docs_search_l0`.
+- `source_record_id` (string): stable Source Library source record id. For Doc
+  Extension v1 this MUST match `doc_id`.
+- `source_span_id` (string): stable Source Library span id for the returned
+  chunk or selector span.
 
 Notes:
 - `doc_id` is the canonical lookup key for hydration.
 - `chunk_id` is an optional anchor that can help choose a small search neighborhood.
+- `source_record_id` and `source_span_id` are audit identifiers. They MUST NOT
+  be treated as a memory-promotion signal.
 
 ### 3.3 `state` (optional but recommended)
 
@@ -128,11 +135,16 @@ Rules:
 
 Optional fields:
 - `level` (string): `"L0"`, `"L1"` or `"L2"` as a suggested excerpt size tier for hydration. If omitted, agents should choose based on context budget.
+- `span_id` (string): stable Source Library span id for the selector span.
 
 `docs_search_l0` returns a `locator.position` selector for the hit chunk. Agents
 may pass this selector, the returned `ref.chunk_id`, or their own quote selector
 to `docs_excerpts_get` for verified hydration.
 
+`docs_search_l0` MUST return `ref.source_span_id` equal to `locator.span_id` for
+the selected chunk span. `docs_excerpts_get` MUST return `locator.span_id` for
+the matched quote, position, or chunk selector span.
+
 ### 3.5 `hashes` (optional)
 
 `hashes` MAY include:
@@ -208,7 +220,9 @@ The agent SHOULD:
   "resolver": "elf_doc_ext/v1",
   "ref": {
     "doc_id": "6b5b2f08-9a89-4c6c-9b6b-9c0c2f0b1f2d",
-    "chunk_id": "b2e8a8d2-4c10-4a1b-98f8-7a8702fd0cc1"
+    "chunk_id": "b2e8a8d2-4c10-4a1b-98f8-7a8702fd0cc1",
+    "source_record_id": "6b5b2f08-9a89-4c6c-9b6b-9c0c2f0b1f2d",
+    "source_span_id": "3190ca88-6f24-5d55-bf8f-9cecfba95b72"
   },
   "state": {
     "content_hash": "baf7cfd2d5b71f5b0f5d5a08a3c38d7b43cf7a2e5a4f75d5c1b4a9072f6dd3b8",
@@ -219,6 +233,7 @@ The agent SHOULD:
     "chunk_hash": "bd85b0e07464bde3a7f3a2b2f3c2d5d4c1c9f0d0c1a2b3c4d5e6f7a8b9c0d1e2"
   },
   "locator": {
+    "span_id": "3190ca88-6f24-5d55-bf8f-9cecfba95b72",
     "position": {
       "start": 128,
       "end": 384