techiejd · techiejd · Jun 11, 2026 · May 31, 2026 · Jun 1, 2026 · Jun 1, 2026
@@ -832,7 +832,7 @@ curl -X POST http://localhost:3000/api/vector-retry-failed-batch \
 
 ### Local API
 
-The plugin provides a `getVectorizedPayload(payload)` function which returns a `vectorizedPayload` object exposing `search`, `queueEmbed`, `bulkEmbed`, and `retryFailedBatch` methods.
+The plugin provides a `getVectorizedPayload(payload)` function which returns a `vectorizedPayload` object exposing `search`, `findByIds`, `queueEmbed`, `bulkEmbed`, and `retryFailedBatch` methods.
 
 #### Getting the Vectorized Payload Object
 
@@ -883,6 +883,33 @@ const results = await vectorizedPayload.search({
 })
 ```
 
+#### `vectorizedPayload.findByIds(params)`
+
+Fetch stored embedding records by primary key. The `id` of each record is whatever [`search()`](#vectorizedpayloadsearchparams) returns as `result.id`, so a search result round-trips directly. Pass `populateEmbedding: true` to also get the raw embedding vector back (the normal search/query API never returns it) — the building block for "more like this" flows. It defaults to `false`, so by default you get the record's text and metadata without the heavy vector.
+
+**Params:** `{ knowledgePool: string; ids: string[]; populateEmbedding?: boolean }` (`populateEmbedding` defaults to `false`).
+
+**Returns:** `Promise<Record<string, EmbeddingRecord | undefined>>` — an object keyed by the ids you passed in. Each requested id is present as a key; a found record is the value, and an unknown or malformed id maps to `undefined`. `EmbeddingRecord` is the search result shape without `score` and with an optional `embedding?: number[]`, present only when `populateEmbedding: true`.
+
+**Example:**
+
+```typescript
+const id = '<an id from a previous search result>'
+const records = await vectorizedPayload.findByIds({
+  knowledgePool: 'mainKnowledgePool',
+  ids: [id],
+  populateEmbedding: true,
+})
+
+const record = records[id]
+if (record) {
+  // record.embedding is the raw number[] vector — feed it back into search for "more like this"
+  console.log(record.embedding!.length, record.chunkText)
+}
+```
+
+Because the result is keyed by id, a search result round-trips directly (`records[searchHit.id]`) and there's no positional alignment to worry about — look records up by id rather than relying on key order. Unknown or malformed ids map to `undefined` (never throw), and an empty `ids` array returns `{}` without touching the backend.
+
 #### `vectorizedPayload.queueEmbed(params)`
 
 Manually queue a vectorization job for a document.

@@ -110,6 +110,7 @@ import type {
   KnowledgePoolDynamicConfig,
   StoreChunkData,
   VectorSearchResult,
+  EmbeddingRecord,
 } from 'payloadcms-vectorize'
 
 export type DbAdapter = {
@@ -150,6 +151,13 @@ export type DbAdapter = {
     limit?: number,
     where?: Where,
   ) => Promise<Array<VectorSearchResult>>
+
+  findByIds: (
+    payload: BasePayload,
+    poolName: KnowledgePoolName,
+    ids: string[],
+    populateEmbedding?: boolean,
+  ) => Promise<Record<string, EmbeddingRecord | undefined>>
 }
 ```
 
@@ -162,6 +170,7 @@ export type DbAdapter = {
 | `deleteChunks` | After a source document is deleted. | Remove every chunk where `sourceCollection === ... && docId === ...`. Must be safe to call when no chunks exist (no-op, no throw). |
 | `hasEmbeddingVersion` | During bulk-embed planning, per candidate document. | Return `true` iff at least one chunk exists with the matching `(sourceCollection, docId, embeddingVersion)` triple. Must filter on **all three** — older `0.7.0` adapters that ignored `embeddingVersion` caused stale embeddings on model bumps. |
 | `search` | Per `/vector-search` request and per `getVectorizedPayload().search()` call. | Translate `where` (Payload-style) into your store's filter language, perform a vector search using `queryEmbedding`, and return up to `limit` results sorted by descending relevance. |
+| `findByIds` | Per `getVectorizedPayload().findByIds()` call. | Fetch stored embedding records by primary key. **Return an object keyed by the ids you were given:** every requested id must be present as a key, with a found record as the value and `undefined` for any id that didn't resolve. The raw `embedding` vector is **only included when `populateEmbedding` is `true`** (default `false`) — omit it otherwise so callers that only need text/metadata don't pay for it. Where possible, skip reading the vector at the source (pg: don't select the column; MongoDB: `{ projection: { embedding: 0 } }`); CF's `getByIds` always returns values, so omit them post-fetch. Look up by the same `id` your `search` returns as `result.id`. Unknown **and** malformed ids must map to `undefined` — never throw for a bad id. Validate the id shape against your key type before querying so a malformed id can't error the whole batch (MongoDB drops non-24-hex ids; pg drops ids that don't match the PK column type — numeric for integer PKs, uuid-shaped for `uuid` PKs — before the `IN` query; CF's ids are arbitrary strings, so an unknown one is simply absent from `getByIds`). Empty `ids` returns `{}` without a backend call. |
 
 ### Error contract
 
@@ -286,6 +295,14 @@ export const createYourDbVectorIntegration = (
       //       Return Array<VectorSearchResult> sorted by descending score.
       return []
     },
+
+    findByIds: async (payload, poolName, ids, populateEmbedding = false) => {
+      // TODO: fetch stored records by primary key. Include the raw `embedding` vector
+      //       only when `populateEmbedding` is true (default false); skip reading it otherwise.
+      //       Return an object keyed by every requested id: a record for hits, `undefined`
+      //       for unknown or malformed ids (never throw for a bad id).
+      return Object.fromEntries(ids.map((id) => [id, undefined]))
+    },
   }
 
   return { adapter }
@@ -361,6 +378,26 @@ export interface VectorSearchResult {
   /** Any extensionFields persisted via storeChunk must round-trip here. */
   [key: string]: any
 }
+
+export interface EmbeddingRecord {
+  /** Embedding record ID — the same value your adapter returns as VectorSearchResult.id. */
+  id: string
+  /** Source collection slug (echoed from StoreChunkData). */
+  sourceCollection: string
+  /** Source document ID (echoed from StoreChunkData). */
+  docId: string
+  /** Chunk index within the source document. */
+  chunkIndex: number
+  /** The original chunk text. */
+  chunkText: string
+  /** Embedding model/version string. */
+  embeddingVersion: string
+  /** The raw embedding vector — never returned by `search`, and only present
+   *  when `findByIds` is called with `populateEmbedding: true`. */
+  embedding?: number[]
+  /** Any extensionFields persisted via storeChunk round-trip here. */
+  [key: string]: any
+}
 ```
 
 | Field | Required | Notes |
@@ -371,6 +408,8 @@ export interface VectorSearchResult {
 | `chunkText`, `embeddingVersion` | yes | Same. |
 | `extensionFields.*` | optional | Whatever the user passed in `extensionFields` must be queryable via `where`. |
 
+> `EmbeddingRecord` (returned by `findByIds`) is `VectorSearchResult` without `score` and with an optional raw `embedding?: number[]` — present only when `findByIds` is called with `populateEmbedding: true`.
+
 ## Testing your adapter
 
 The dev harness in [`dev/`](../dev) runs the integration suite against any adapter you wire up. To test a new adapter:

@@ -61,6 +61,13 @@ function createMockCloudflareBinding() {
       }
     }),
 
+    getByIds: vi.fn(async (ids: string[]) => {
+      return ids
+        .map((id) => storage.get(id))
+        .filter((v): v is { id: string; values: number[]; metadata: any } => v !== undefined)
+        .map((v) => ({ id: v.id, values: v.values, metadata: v.metadata }))
+    }),
+
     list: vi.fn(async (options: any) => {
       const vectors = Array.from(storage.values()).map((item) => ({
         id: item.id,
@@ -431,4 +438,104 @@ describe('createCloudflareVectorizeIntegration', () => {
       })
     })
   })
+
+  describe('findByIds', () => {
+    test('returns full EmbeddingRecord including embedding values when populateEmbedding is true', async () => {
+      const mockBinding = createMockCloudflareBinding()
+      const { adapter } = createCloudflareVectorizeIntegration({
+        config: { default: { dims: DIMS } },
+        binding: mockBinding as any,
+      })
+      const mockPayload = createMockPayload(mockBinding)
+      const embedding = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
+
+      await adapter.storeChunk(mockPayload, 'default', {
+        sourceCollection: 'posts',
+        docId: 'doc-1',
+        chunkIndex: 0,
+        chunkText: 'find me',
+        embeddingVersion: 'v1',
+        embedding,
+        extensionFields: { category: 'science' },
+      })
+
+      const id = 'default:posts:doc-1:0'
+      const records = await adapter.findByIds(mockPayload, 'default', [id], true)
+      expect(Object.keys(records)).toEqual([id])
+      const r = records[id]!
+      expect(r.id).toBe(id)
+      expect(r.embedding).toEqual(embedding)
+      expect(r.sourceCollection).toBe('posts')
+      expect(r.docId).toBe('doc-1')
+      expect(r.chunkText).toBe('find me')
+      expect(r.embeddingVersion).toBe('v1')
+      expect((r as any).category).toBe('science')
+    })
+
+    test('omits embedding values by default', async () => {
+      const mockBinding = createMockCloudflareBinding()
+      const { adapter } = createCloudflareVectorizeIntegration({
+        config: { default: { dims: DIMS } },
+        binding: mockBinding as any,
+      })
+      const mockPayload = createMockPayload(mockBinding)
+
+      await adapter.storeChunk(mockPayload, 'default', {
+        sourceCollection: 'posts',
+        docId: 'doc-1',
+        chunkIndex: 0,
+        chunkText: 'find me',
+        embeddingVersion: 'v1',
+        embedding: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8],
+        extensionFields: { category: 'science' },
+      })
+
+      const id = 'default:posts:doc-1:0'
+      const records = await adapter.findByIds(mockPayload, 'default', [id])
+      expect(Object.keys(records)).toEqual([id])
+      const r = records[id]!
+      expect(r.id).toBe(id)
+      expect(r.embedding).toBeUndefined()
+      expect(r.chunkText).toBe('find me')
+      expect((r as any).category).toBe('science')
+    })
+
+    test('maps misses to undefined', async () => {
+      const mockBinding = createMockCloudflareBinding()
+      const { adapter } = createCloudflareVectorizeIntegration({
+        config: { default: { dims: DIMS } },
+        binding: mockBinding as any,
+      })
+      const mockPayload = createMockPayload(mockBinding)
+      await adapter.storeChunk(mockPayload, 'default', {
+        sourceCollection: 'posts',
+        docId: 'doc-1',
+        chunkIndex: 0,
+        chunkText: 'x',
+        embeddingVersion: 'v1',
+        embedding: [0, 0, 0, 0, 0, 0, 0, 0],
+        extensionFields: {},
+      })
+      const records = await adapter.findByIds(mockPayload, 'default', [
+        'default:posts:doc-1:0',
+        'default:posts:nope:0',
+      ])
+      expect(Object.keys(records).sort()).toEqual(
+        ['default:posts:doc-1:0', 'default:posts:nope:0'].sort(),
+      )
+      expect(records['default:posts:doc-1:0']!.id).toBe('default:posts:doc-1:0')
+      expect(records['default:posts:nope:0']).toBeUndefined()
+    })
+
+    test('empty ids returns {}', async () => {
+      const mockBinding = createMockCloudflareBinding()
+      const { adapter } = createCloudflareVectorizeIntegration({
+        config: { default: { dims: DIMS } },
+        binding: mockBinding as any,
+      })
+      const mockPayload = createMockPayload(mockBinding)
+      const records = await adapter.findByIds(mockPayload, 'default', [])
+      expect(records).toEqual({})
+    })
+  })
 })
@@ -0,0 +1,48 @@
+import { BasePayload } from 'payload'
+import { KnowledgePoolName, EmbeddingRecord } from 'payloadcms-vectorize'
+import { getVectorizeBinding } from './types.js'
+
+const RESERVED_METADATA = ['sourceCollection', 'docId', 'chunkIndex', 'chunkText', 'embeddingVersion']
+
+export default async (
+  payload: BasePayload,
+  _poolName: KnowledgePoolName,
+  ids: string[],
+  populateEmbedding = false,
+): Promise<Record<string, EmbeddingRecord | undefined>> => {
+  const result: Record<string, EmbeddingRecord | undefined> = {}
+  for (const id of ids) result[id] = undefined
+  if (ids.length === 0) return result
+
+  const binding = getVectorizeBinding(payload)
+
+  try {
+    const vectors = await binding.getByIds(ids)
+    if (!vectors) return result
+
+    for (const vector of vectors) {
+      const metadata = (vector.metadata || {}) as Record<string, unknown>
+      const extensionFields = Object.fromEntries(
+        Object.entries(metadata).filter(([k]) => !RESERVED_METADATA.includes(k)),
+      )
+      result[vector.id] = {
+        id: vector.id,
+        sourceCollection: String(metadata.sourceCollection ?? ''),
+        docId: String(metadata.docId ?? ''),
+        chunkIndex:
+          typeof metadata.chunkIndex === 'number'
+            ? metadata.chunkIndex
+            : parseInt(String(metadata.chunkIndex ?? '0'), 10),
+        chunkText: String(metadata.chunkText ?? ''),
+        embeddingVersion: String(metadata.embeddingVersion ?? ''),
+        ...(populateEmbedding ? { embedding: Array.from(vector.values ?? []) } : {}),
+        ...extensionFields,
+      }
+    }
+    return result
+  } catch (e) {
+    const errorMessage = e instanceof Error ? e.message : String(e)
+    payload.logger.error(`[@payloadcms-vectorize/cf] findByIds failed: ${errorMessage}`)
+    throw new Error(`[@payloadcms-vectorize/cf] findByIds failed: ${errorMessage}`)
+  }
+}
@@ -5,6 +5,7 @@ import type { CloudflareVectorizeBinding, KnowledgePoolsConfig, VectorizeBinding
 import cfMappingsCollection, { CF_MAPPINGS_SLUG } from './collections/cfMappings.js'
 import embed from './embed.js'
 import search from './search.js'
+import findByIds from './findByIds.js'
 
 /**
  * Configuration for Cloudflare Vectorize integration
@@ -113,6 +114,8 @@ export const createCloudflareVectorizeIntegration = (
       }
     },
 
+    findByIds,
+
     hasEmbeddingVersion: async (payload, poolName, sourceCollection, docId, embeddingVersion) => {
       const result = await payload.find({
         collection: CF_MAPPINGS_SLUG as CollectionSlug,