From 146549d10fc7169e83e5e8cc96ce5027702725bf Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sun, 31 May 2026 23:44:33 +0700
Subject: [PATCH 01/17] build(cf): add @cloudflare/workers-types devDependency
---
adapters/cf/package.json | 1 +
pnpm-lock.yaml | 40 +++++++++++++++++++++++++---------------
2 files changed, 26 insertions(+), 15 deletions(-)
diff --git a/adapters/cf/package.json b/adapters/cf/package.json
index 60f0f1b..67c9c57 100644
--- a/adapters/cf/package.json
+++ b/adapters/cf/package.json
@@ -30,6 +30,7 @@
"payloadcms-vectorize": ">=1.0.0"
},
"devDependencies": {
+ "@cloudflare/workers-types": "^4.20240000.0",
"payloadcms-vectorize": "workspace:*"
},
"engines": {
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index c9e19e1..28b5ddf 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -19,10 +19,10 @@ importers:
version: 3.3.3
'@payloadcms/db-postgres':
specifier: 3.69.0
- version: 3.69.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))
+ version: 3.69.0(@cloudflare/workers-types@4.20260531.1)(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))
'@payloadcms/db-sqlite':
specifier: 3.69.0
- version: 3.69.0(@opentelemetry/api@1.9.0)(@types/pg@8.16.0)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))(pg@8.16.3)
+ version: 3.69.0(@cloudflare/workers-types@4.20260531.1)(@opentelemetry/api@1.9.0)(@types/pg@8.16.0)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))(pg@8.16.3)
'@payloadcms/eslint-config':
specifier: 3.9.0
version: 3.9.0(@typescript-eslint/eslint-plugin@8.51.0(@typescript-eslint/parser@8.51.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.7.3))(eslint@9.39.2(jiti@2.6.1))(typescript@5.7.3))(jest@30.2.0(@types/node@22.19.3)(babel-plugin-macros@3.1.0)(esbuild-register@3.6.0(esbuild@0.25.12)))(jiti@2.6.1)
@@ -153,6 +153,9 @@ importers:
specifier: '>=3.0.0 <4.0.0'
version: 3.69.0(graphql@16.12.0)(typescript@5.7.3)
devDependencies:
+ '@cloudflare/workers-types':
+ specifier: ^4.20240000.0
+ version: 4.20260531.1
payloadcms-vectorize:
specifier: workspace:*
version: link:../..
@@ -180,7 +183,7 @@ importers:
dependencies:
'@payloadcms/db-postgres':
specifier: '>=3.0.0 <4.0.0'
- version: 3.69.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))
+ version: 3.69.0(@cloudflare/workers-types@4.20260531.1)(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))
payload:
specifier: '>=3.0.0 <4.0.0'
version: 3.69.0(graphql@16.12.0)(typescript@5.7.3)
@@ -457,6 +460,9 @@ packages:
'@changesets/write@0.4.0':
resolution: {integrity: sha512-CdTLvIOPiCNuH71pyDu3rA+Q0n65cmAbXnwWH84rKGiFumFzkmHNT8KHTMEchcxN+Kl8I54xGUhJ7l3E7X396Q==}
+ '@cloudflare/workers-types@4.20260531.1':
+ resolution: {integrity: sha512-7DybhbX12n+mVgJEDvm9W/jjqpaUIczg+RWj1Hua9nGEG+pNJnT+yZj1JKENrbdyuGWx3OFEgUCNFcGJN86Dvg==}
+
'@date-fns/tz@1.2.0':
resolution: {integrity: sha512-LBrd7MiJZ9McsOgxqWX7AaxrDjcFVjWH/tIKJd7pnR7McaslGYOP1QmmiBXdJH/H/yLCT+rcQ7FaPBUxRGUtrg==}
@@ -7225,6 +7231,8 @@ snapshots:
human-id: 4.1.3
prettier: 2.8.8
+ '@cloudflare/workers-types@4.20260531.1': {}
+
'@date-fns/tz@1.2.0': {}
'@dnd-kit/accessibility@3.1.1(react@19.1.0)':
@@ -8649,13 +8657,13 @@ snapshots:
- socks
- supports-color
- '@payloadcms/db-postgres@3.69.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))':
+ '@payloadcms/db-postgres@3.69.0(@cloudflare/workers-types@4.20260531.1)(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))':
dependencies:
- '@payloadcms/drizzle': 3.69.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))(pg@8.16.3)
+ '@payloadcms/drizzle': 3.69.0(@cloudflare/workers-types@4.20260531.1)(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))(pg@8.16.3)
'@types/pg': 8.10.2
console-table-printer: 2.12.1
drizzle-kit: 0.31.7
- drizzle-orm: 0.44.7(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(pg@8.16.3)
+ drizzle-orm: 0.44.7(@cloudflare/workers-types@4.20260531.1)(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(pg@8.16.3)
payload: 3.69.0(graphql@16.12.0)(typescript@5.7.3)
pg: 8.16.3
prompts: 2.4.2
@@ -8692,13 +8700,13 @@ snapshots:
- sqlite3
- supports-color
- '@payloadcms/db-sqlite@3.69.0(@opentelemetry/api@1.9.0)(@types/pg@8.16.0)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))(pg@8.16.3)':
+ '@payloadcms/db-sqlite@3.69.0(@cloudflare/workers-types@4.20260531.1)(@opentelemetry/api@1.9.0)(@types/pg@8.16.0)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))(pg@8.16.3)':
dependencies:
'@libsql/client': 0.14.0
- '@payloadcms/drizzle': 3.69.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.16.0)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))(pg@8.16.3)
+ '@payloadcms/drizzle': 3.69.0(@cloudflare/workers-types@4.20260531.1)(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.16.0)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))(pg@8.16.3)
console-table-printer: 2.12.1
drizzle-kit: 0.31.7
- drizzle-orm: 0.44.7(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.16.0)(pg@8.16.3)
+ drizzle-orm: 0.44.7(@cloudflare/workers-types@4.20260531.1)(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.16.0)(pg@8.16.3)
payload: 3.69.0(graphql@16.12.0)(typescript@5.7.3)
prompts: 2.4.2
to-snake-case: 1.0.0
@@ -8736,11 +8744,11 @@ snapshots:
- supports-color
- utf-8-validate
- '@payloadcms/drizzle@3.69.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))(pg@8.16.3)':
+ '@payloadcms/drizzle@3.69.0(@cloudflare/workers-types@4.20260531.1)(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))(pg@8.16.3)':
dependencies:
console-table-printer: 2.12.1
dequal: 2.0.3
- drizzle-orm: 0.44.7(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(pg@8.16.3)
+ drizzle-orm: 0.44.7(@cloudflare/workers-types@4.20260531.1)(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(pg@8.16.3)
payload: 3.69.0(graphql@16.12.0)(typescript@5.7.3)
prompts: 2.4.2
to-snake-case: 1.0.0
@@ -8776,11 +8784,11 @@ snapshots:
- sql.js
- sqlite3
- '@payloadcms/drizzle@3.69.0(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.16.0)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))(pg@8.16.3)':
+ '@payloadcms/drizzle@3.69.0(@cloudflare/workers-types@4.20260531.1)(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.16.0)(payload@3.69.0(graphql@16.12.0)(typescript@5.7.3))(pg@8.16.3)':
dependencies:
console-table-printer: 2.12.1
dequal: 2.0.3
- drizzle-orm: 0.44.7(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.16.0)(pg@8.16.3)
+ drizzle-orm: 0.44.7(@cloudflare/workers-types@4.20260531.1)(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.16.0)(pg@8.16.3)
payload: 3.69.0(graphql@16.12.0)(typescript@5.7.3)
prompts: 2.4.2
to-snake-case: 1.0.0
@@ -10671,15 +10679,17 @@ snapshots:
transitivePeerDependencies:
- supports-color
- drizzle-orm@0.44.7(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(pg@8.16.3):
+ drizzle-orm@0.44.7(@cloudflare/workers-types@4.20260531.1)(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.10.2)(pg@8.16.3):
optionalDependencies:
+ '@cloudflare/workers-types': 4.20260531.1
'@libsql/client': 0.14.0
'@opentelemetry/api': 1.9.0
'@types/pg': 8.10.2
pg: 8.16.3
- drizzle-orm@0.44.7(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.16.0)(pg@8.16.3):
+ drizzle-orm@0.44.7(@cloudflare/workers-types@4.20260531.1)(@libsql/client@0.14.0)(@opentelemetry/api@1.9.0)(@types/pg@8.16.0)(pg@8.16.3):
optionalDependencies:
+ '@cloudflare/workers-types': 4.20260531.1
'@libsql/client': 0.14.0
'@opentelemetry/api': 1.9.0
'@types/pg': 8.16.0
From 0af470cf45219416ad9c417117a44f8bb858ef39 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 1 Jun 2026 08:59:09 +0700
Subject: [PATCH 02/17] refactor(cf): adopt @cloudflare/workers-types Vectorize
binding
---
adapters/cf/src/types.ts | 54 ++++------------------------------------
1 file changed, 5 insertions(+), 49 deletions(-)
diff --git a/adapters/cf/src/types.ts b/adapters/cf/src/types.ts
index 92786d4..e10e441 100644
--- a/adapters/cf/src/types.ts
+++ b/adapters/cf/src/types.ts
@@ -1,65 +1,21 @@
+///
import type { BasePayload } from 'payload'
import { getVectorizedPayload } from 'payloadcms-vectorize'
-/**
- * Retrieve the Cloudflare Vectorize binding from a Payload instance.
- * Throws if the binding is not found.
- */
-export function getVectorizeBinding(payload: BasePayload): CloudflareVectorizeBinding {
+export function getVectorizeBinding(payload: BasePayload): Vectorize {
const binding = getVectorizedPayload(payload)?.getDbAdapterCustom()
- ?._vectorizeBinding as CloudflareVectorizeBinding | undefined
+ ?._vectorizeBinding as Vectorize | undefined
if (!binding) {
throw new Error('[@payloadcms-vectorize/cf] Cloudflare Vectorize binding not found')
}
return binding
}
-/**
- * Configuration for a knowledge pool in Cloudflare Vectorize
- */
export interface CloudflareVectorizePoolConfig {
- /** Vector dimensions for this pool (must match embedding model output) */
dims: number
}
-/**
- * All knowledge pools configuration for Cloudflare Vectorize
- */
export type KnowledgePoolsConfig = Record
-/** A single vector match returned by a Vectorize query */
-export interface VectorizeMatch {
- id: string
- score?: number
- metadata?: Record
-}
-
-/** Result of a Vectorize query */
-export interface VectorizeQueryResult {
- matches: VectorizeMatch[]
- count: number
-}
-
-/** Vector to upsert into Vectorize */
-export interface VectorizeVector {
- id: string
- values: number[]
- metadata?: Record
-}
-
-/**
- * Cloudflare Vectorize binding interface.
- * Mirrors the subset of the Vectorize API we use.
- * For the full type, install `@cloudflare/workers-types`.
- */
-export interface CloudflareVectorizeBinding {
- query(vector: number[], options?: {
- topK?: number
- returnMetadata?: boolean | 'indexed' | 'all'
- filter?: Record
- /** Vectorize metadata filtering */
- where?: Record
- }): Promise
- upsert(vectors: VectorizeVector[]): Promise
- deleteByIds(ids: string[]): Promise
-}
+/** @deprecated Use the official `Vectorize` type from `@cloudflare/workers-types`. */
+export type CloudflareVectorizeBinding = Vectorize
From 3319f85687e580f7da5ded51e554ed5be1130004 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 1 Jun 2026 09:08:40 +0700
Subject: [PATCH 03/17] feat: add EmbeddingRecord type and
findByIds/findEmbeddingsByIds signatures
---
src/index.ts | 1 +
src/types.ts | 20 ++++++++++++++++++++
2 files changed, 21 insertions(+)
diff --git a/src/index.ts b/src/index.ts
index bf3ac4e..1fba1ed 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -76,6 +76,7 @@ export type {
// For adapters
VectorSearchResult,
+ EmbeddingRecord,
} from './types.js'
export { getVectorizedPayload } from './types.js'
diff --git a/src/types.ts b/src/types.ts
index e54611d..0701068 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -57,6 +57,10 @@ export type VectorizedPayload = {
_isBulkEmbedEnabled: (knowledgePool: KnowledgePoolName) => boolean
getDbAdapterCustom: () => Record | undefined
search: (params: VectorSearchQuery) => Promise>
+ findEmbeddingsByIds: (params: {
+ knowledgePool: KnowledgePoolName
+ ids: string[]
+ }) => Promise>
queueEmbed: (
params:
| {
@@ -322,6 +326,17 @@ export interface VectorSearchResult {
[key: string]: any // Extension fields and other dynamic fields
}
+export interface EmbeddingRecord {
+ id: string
+ sourceCollection: string
+ docId: string
+ chunkIndex: number
+ chunkText: string
+ embeddingVersion: string
+ embedding: number[]
+ [key: string]: any
+}
+
export interface VectorSearchQuery {
/** The knowledge pool to search in */
knowledgePool: KnowledgePoolName
@@ -430,4 +445,9 @@ export type DbAdapter = {
limit?: number,
where?: Where,
) => Promise>
+ findByIds: (
+ payload: BasePayload,
+ poolName: KnowledgePoolName,
+ ids: string[],
+ ) => Promise>
}
From 4357d1d83ab3cd22f19f815cf74b55ae6f62475d Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 1 Jun 2026 10:45:44 +0700
Subject: [PATCH 04/17] feat: wire findEmbeddingsByIds public method with
mock-adapter coverage
---
dev/helpers/mockAdapter.ts | 35 +++++++++++++++++-
dev/specs/vectorizedPayload.spec.ts | 57 +++++++++++++++++++++++++++++
src/index.ts | 4 ++
3 files changed, 95 insertions(+), 1 deletion(-)
diff --git a/dev/helpers/mockAdapter.ts b/dev/helpers/mockAdapter.ts
index dacad6f..932a1bd 100644
--- a/dev/helpers/mockAdapter.ts
+++ b/dev/helpers/mockAdapter.ts
@@ -1,4 +1,4 @@
-import type { DbAdapter, KnowledgePoolName, KnowledgePoolDynamicConfig, StoreChunkData, VectorSearchResult } from 'payloadcms-vectorize'
+import type { DbAdapter, EmbeddingRecord, KnowledgePoolName, KnowledgePoolDynamicConfig, StoreChunkData, VectorSearchResult } from 'payloadcms-vectorize'
import { createEmbeddingsCollection } from 'payloadcms-vectorize'
import type { CollectionSlug, Payload, BasePayload, Where, Config } from 'payload'
@@ -195,6 +195,39 @@ export const createMockAdapter = (options: MockAdapterOptions = {}): DbAdapter =
.slice(0, limit)
.map(({ _score, ...rest }) => rest)
},
+
+ findByIds: async (
+ payload: BasePayload,
+ poolName: KnowledgePoolName,
+ ids: string[],
+ ): Promise => {
+ const records: EmbeddingRecord[] = []
+ for (const id of ids) {
+ const stored = storage.get(`${poolName}:${id}`)
+ if (!stored) continue
+ try {
+ const doc = await payload.findByID({
+ collection: poolName as CollectionSlug,
+ id: stored.id,
+ })
+ if (!doc) continue
+ const {
+ id: _id,
+ createdAt: _createdAt,
+ updatedAt: _updatedAt,
+ embedding: _embedding,
+ ...docFields
+ } = doc as any
+ records.push({
+ id: stored.id,
+ embedding: stored.embedding,
+ ...docFields,
+ } as EmbeddingRecord)
+ } catch (_e) {
+ }
+ }
+ return records
+ },
}
}
diff --git a/dev/specs/vectorizedPayload.spec.ts b/dev/specs/vectorizedPayload.spec.ts
index 65c40ba..7895bd5 100644
--- a/dev/specs/vectorizedPayload.spec.ts
+++ b/dev/specs/vectorizedPayload.spec.ts
@@ -200,6 +200,63 @@ describe('VectorizedPayload', () => {
})
})
+ describe('findEmbeddingsByIds method', () => {
+ let embeddingId: string
+
+ beforeAll(async () => {
+ const post = await payload.create({
+ collection: 'posts',
+ data: { title: 'FindByIds seed', content: markdownContent as unknown as any },
+ })
+ await waitForVectorizationJobs(payload)
+ const rows = await payload.find({
+ collection: 'default' as any,
+ where: { docId: { equals: String(post.id) } },
+ limit: 1,
+ })
+ embeddingId = String(rows.docs[0].id)
+ })
+
+ test('payload has findEmbeddingsByIds method', () => {
+ const vectorizedPayload = getVectorizedPayload(payload)
+ expect(typeof vectorizedPayload!.findEmbeddingsByIds).toBe('function')
+ })
+
+ test('returns the full EmbeddingRecord including the embedding vector', async () => {
+ const vectorizedPayload = getVectorizedPayload(payload)!
+ const records = await vectorizedPayload.findEmbeddingsByIds({
+ knowledgePool: 'default',
+ ids: [embeddingId],
+ })
+ expect(records).toHaveLength(1)
+ const [record] = records
+ expect(record.id).toBe(embeddingId)
+ expect(Array.isArray(record.embedding)).toBe(true)
+ expect(record.embedding.length).toBe(DIMS)
+ expect(typeof record.sourceCollection).toBe('string')
+ expect(typeof record.chunkText).toBe('string')
+ })
+
+ test('drops unknown ids (result length < ids length)', async () => {
+ const vectorizedPayload = getVectorizedPayload(payload)!
+ const records = await vectorizedPayload.findEmbeddingsByIds({
+ knowledgePool: 'default',
+ ids: [embeddingId, 'definitely-not-an-id-999999'],
+ })
+ expect(records).toHaveLength(1)
+ expect(records[0].id).toBe(embeddingId)
+ })
+
+ test('empty ids returns []', async () => {
+ const vectorizedPayload = getVectorizedPayload(payload)!
+ const records = await vectorizedPayload.findEmbeddingsByIds({
+ knowledgePool: 'default',
+ ids: [],
+ })
+ expect(records).toEqual([])
+ })
+ })
+
describe('queueEmbed method', () => {
test('payload has queueEmbed method', () => {
const vectorizedPayload = getVectorizedPayload(payload)
diff --git a/src/index.ts b/src/index.ts
index 1fba1ed..6e77714 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -357,6 +357,10 @@ export default (pluginOptions: PayloadcmsVectorizeConfig) =>
params.limit,
params.where,
),
+ findEmbeddingsByIds: (params: { knowledgePool: KnowledgePoolName; ids: string[] }) => {
+ if (params.ids.length === 0) return Promise.resolve([])
+ return pluginOptions.dbAdapter.findByIds(payload, params.knowledgePool, params.ids)
+ },
queueEmbed: async (
params:
| {
From a5e95e53b1109fa066e7c7cc0d81425978b157be Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 1 Jun 2026 11:34:27 +0700
Subject: [PATCH 05/17] feat(pg): implement findByIds read primitive
---
adapters/pg/dev/specs/findByIds.spec.ts | 105 +++++++++++++++++++++++
adapters/pg/src/findByIds.ts | 106 ++++++++++++++++++++++++
adapters/pg/src/index.ts | 2 +
3 files changed, 213 insertions(+)
create mode 100644 adapters/pg/dev/specs/findByIds.spec.ts
create mode 100644 adapters/pg/src/findByIds.ts
diff --git a/adapters/pg/dev/specs/findByIds.spec.ts b/adapters/pg/dev/specs/findByIds.spec.ts
new file mode 100644
index 0000000..c1c2323
--- /dev/null
+++ b/adapters/pg/dev/specs/findByIds.spec.ts
@@ -0,0 +1,105 @@
+import type { Payload } from 'payload'
+import { afterAll, beforeAll, describe, expect, test } from 'vitest'
+import { postgresAdapter } from '@payloadcms/db-postgres'
+import { buildDummyConfig, integration, plugin, DIMS } from './constants.js'
+import { createTestDb, destroyPayload, waitForVectorizationJobs } from './utils.js'
+import { getPayload } from 'payload'
+import { chunkText } from '@shared-test/helpers/chunkers'
+import { makeDummyEmbedDocs, makeDummyEmbedQuery, testEmbeddingVersion } from '@shared-test/helpers/embed'
+
+describe('pg findByIds', () => {
+ let payload: Payload
+ const dbName = 'pg_find_by_ids_test'
+ let embeddingId: string
+
+ beforeAll(async () => {
+ await createTestDb({ dbName })
+ const config = await buildDummyConfig({
+ jobs: { tasks: [], autoRun: [{ cron: '*/5 * * * * *', limit: 10 }] },
+ collections: [
+ { slug: 'posts', fields: [
+ { name: 'title', type: 'text' },
+ { name: 'category', type: 'text' },
+ ] },
+ ],
+ db: postgresAdapter({
+ extensions: ['vector'],
+ afterSchemaInit: [integration.afterSchemaInitHook],
+ pool: { connectionString: `postgresql://postgres:password@localhost:5433/${dbName}` },
+ }),
+ plugins: [
+ plugin({
+ knowledgePools: {
+ default: {
+ collections: {
+ posts: {
+ toKnowledgePool: async (doc) => {
+ const chunks: Array<{ chunk: string; category?: string }> = []
+ if (doc.title) {
+ for (const chunk of chunkText(doc.title)) {
+ chunks.push({ chunk, category: doc.category || 'general' })
+ }
+ }
+ return chunks
+ },
+ },
+ },
+ extensionFields: [{ name: 'category', type: 'text' }],
+ embeddingConfig: {
+ version: testEmbeddingVersion,
+ queryFn: makeDummyEmbedQuery(DIMS),
+ realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+ },
+ },
+ },
+ }),
+ ],
+ })
+ payload = await getPayload({ config, key: `pg-find-by-ids-${Date.now()}`, cron: true })
+
+ const post = await payload.create({
+ collection: 'posts',
+ data: { title: 'Find me by id', category: 'science' },
+ })
+ await waitForVectorizationJobs(payload)
+ const rows = await payload.find({
+ collection: 'default' as any,
+ where: { docId: { equals: String(post.id) } },
+ limit: 1,
+ })
+ embeddingId = String(rows.docs[0].id)
+ })
+
+ afterAll(async () => {
+ await destroyPayload(payload)
+ })
+
+ test('returns full EmbeddingRecord including numeric embedding array', async () => {
+ const records = await integration.adapter.findByIds(payload, 'default', [embeddingId])
+ expect(records).toHaveLength(1)
+ const [r] = records
+ expect(r.id).toBe(embeddingId)
+ expect(Array.isArray(r.embedding)).toBe(true)
+ expect(r.embedding.length).toBe(DIMS)
+ expect(r.embedding.every((n) => typeof n === 'number')).toBe(true)
+ expect(r.sourceCollection).toBe('posts')
+ expect(typeof r.chunkText).toBe('string')
+ expect(r.embeddingVersion).toBe(testEmbeddingVersion)
+ })
+
+ test('includes extension fields when the pool defines them', async () => {
+ const [r] = await integration.adapter.findByIds(payload, 'default', [embeddingId])
+ expect((r as any).category).toBe('science')
+ })
+
+ test('drops misses', async () => {
+ const records = await integration.adapter.findByIds(payload, 'default', [embeddingId, '999999'])
+ expect(records).toHaveLength(1)
+ expect(records[0].id).toBe(embeddingId)
+ })
+
+ test('empty ids returns []', async () => {
+ const records = await integration.adapter.findByIds(payload, 'default', [])
+ expect(records).toEqual([])
+ })
+})
diff --git a/adapters/pg/src/findByIds.ts b/adapters/pg/src/findByIds.ts
new file mode 100644
index 0000000..76a9e9d
--- /dev/null
+++ b/adapters/pg/src/findByIds.ts
@@ -0,0 +1,106 @@
+import { inArray } from '@payloadcms/db-postgres/drizzle'
+import { BasePayload, SanitizedCollectionConfig } from 'payload'
+import { KnowledgePoolName, EmbeddingRecord } from 'payloadcms-vectorize'
+import toSnakeCase from 'to-snake-case'
+import { getEmbeddingsTable } from './drizzle.js'
+
+export default async (
+ payload: BasePayload,
+ poolName: KnowledgePoolName,
+ ids: string[],
+): Promise> => {
+ if (ids.length === 0) return []
+
+ const isPostgres = payload.db?.pool?.query || payload.db?.drizzle
+ if (!isPostgres) {
+ throw new Error('[@payloadcms-vectorize/pg] Only works with Postgres')
+ }
+ const drizzle = payload.db?.drizzle
+ if (!drizzle) {
+ throw new Error('[@payloadcms-vectorize/pg] Drizzle instance not found in adapter')
+ }
+
+ const collectionConfig = payload.collections[poolName]?.config
+ if (!collectionConfig) {
+ throw new Error(`[@payloadcms-vectorize/pg] Collection ${poolName} not found`)
+ }
+
+ const table = getEmbeddingsTable(poolName)
+ if (!table) {
+ throw new Error(
+ `[@payloadcms-vectorize/pg] Embeddings table for knowledge pool "${poolName}" not registered.`,
+ )
+ }
+
+ const numericIds = ids.filter((id) => /^\d+$/.test(id)).map(Number)
+ if (numericIds.length === 0) return []
+
+ const selectObj: Record = {
+ id: table.id,
+ embedding: table.embedding,
+ }
+ for (const field of collectionConfig.fields ?? []) {
+ if (typeof field === 'object' && 'name' in field) {
+ const name = field.name as string
+ if (name in table) {
+ selectObj[name] = table[name]
+ } else if (toSnakeCase(name) in table) {
+ selectObj[name] = table[toSnakeCase(name)]
+ }
+ }
+ }
+
+ const rows = await drizzle.select(selectObj).from(table).where(inArray(table.id, numericIds))
+ return mapRowsToRecords(rows, collectionConfig)
+}
+
+function mapRowsToRecords(
+ rows: Record[],
+ collectionConfig: SanitizedCollectionConfig,
+): Array {
+ const numberFields = new Set()
+ for (const field of collectionConfig.fields) {
+ if (typeof field === 'object' && 'name' in field && field.type === 'number') {
+ numberFields.add(field.name)
+ }
+ }
+
+ return rows.map((row) => {
+ const rawDocId = row.docId ?? row.doc_id
+ const rawChunkIndex = row.chunkIndex ?? row.chunk_index
+
+ const record = {
+ ...row,
+ id: String(row.id),
+ docId: String(rawDocId),
+ chunkIndex:
+ typeof rawChunkIndex === 'number' ? rawChunkIndex : parseInt(String(rawChunkIndex), 10),
+ embedding: parseEmbedding(row.embedding),
+ } as EmbeddingRecord
+
+ for (const fieldName of numberFields) {
+ const value = record[fieldName]
+ if (value != null && typeof value !== 'number') {
+ const parsed = parseFloat(String(value))
+ if (!Number.isNaN(parsed)) {
+ record[fieldName] = parsed
+ }
+ }
+ }
+
+ return record
+ })
+}
+
+function parseEmbedding(value: unknown): number[] {
+ if (Array.isArray(value)) return value as number[]
+ if (typeof value === 'string') {
+ return value
+ .replace(/^\[/, '')
+ .replace(/\]$/, '')
+ .split(',')
+ .filter((s) => s.length > 0)
+ .map((s) => Number(s))
+ }
+ return []
+}
diff --git a/adapters/pg/src/index.ts b/adapters/pg/src/index.ts
index ac28c21..22c25c0 100644
--- a/adapters/pg/src/index.ts
+++ b/adapters/pg/src/index.ts
@@ -9,6 +9,7 @@ import { fileURLToPath } from 'url'
import { dirname, resolve } from 'path'
import embed from './embed.js'
import search from './search.js'
+import findByIds from './findByIds.js'
export type { KnowledgePoolsConfig as KnowledgePoolConfig }
@@ -93,6 +94,7 @@ export const createPostgresVectorIntegration = (
}
},
search,
+ findByIds,
storeChunk: async (payload, poolName, data) => {
const embeddingArray = Array.isArray(data.embedding) ? data.embedding : Array.from(data.embedding)
From fd2d9d8a03bfdc4d993dccb8261bab05223be070 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 1 Jun 2026 12:44:35 +0700
Subject: [PATCH 06/17] feat(mongodb): implement findByIds read primitive
---
adapters/mongodb/dev/specs/findByIds.spec.ts | 89 ++++++++++++++++++++
adapters/mongodb/src/findByIds.ts | 59 +++++++++++++
adapters/mongodb/src/index.ts | 3 +
3 files changed, 151 insertions(+)
create mode 100644 adapters/mongodb/dev/specs/findByIds.spec.ts
create mode 100644 adapters/mongodb/src/findByIds.ts
diff --git a/adapters/mongodb/dev/specs/findByIds.spec.ts b/adapters/mongodb/dev/specs/findByIds.spec.ts
new file mode 100644
index 0000000..87d04ac
--- /dev/null
+++ b/adapters/mongodb/dev/specs/findByIds.spec.ts
@@ -0,0 +1,89 @@
+import { afterAll, beforeAll, describe, expect, test } from 'vitest'
+import { MongoClient } from 'mongodb'
+import type { BasePayload } from 'payload'
+import type { DbAdapter } from 'payloadcms-vectorize'
+import { DIMS, MONGO_URI } from './constants.js'
+import { buildMongoTestPayload, teardownDbs } from './utils.js'
+import { testEmbeddingVersion, makeDummyEmbedDocs, makeDummyEmbedQuery } from '@shared-test/helpers/embed'
+
+const DB = `mongo_find_by_ids_${Date.now()}`
+
+describe('mongodb findByIds', () => {
+ let payload: BasePayload
+ let adapter: DbAdapter
+ let embeddingId: string
+
+ beforeAll(async () => {
+ const built = await buildMongoTestPayload({
+ uri: MONGO_URI,
+ dbName: DB,
+ pools: { default: { dimensions: DIMS, filterableFields: ['category'] } },
+ knowledgePools: {
+ default: {
+ collections: {},
+ extensionFields: [{ name: 'category', type: 'text' }],
+ embeddingConfig: {
+ version: testEmbeddingVersion,
+ queryFn: makeDummyEmbedQuery(DIMS),
+ realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+ },
+ },
+ },
+ })
+ payload = built.payload
+ adapter = built.adapter
+
+ await adapter.storeChunk(payload, 'default', {
+ sourceCollection: 'posts',
+ docId: 'doc-1',
+ chunkIndex: 0,
+ chunkText: 'find me',
+ embeddingVersion: testEmbeddingVersion,
+ embedding: Array(DIMS).fill(0.25),
+ extensionFields: { category: 'science' },
+ })
+
+ const c = new MongoClient(MONGO_URI)
+ await c.connect()
+ const doc = await c.db(`${DB}_vectors`).collection('vectorize_default').findOne({ docId: 'doc-1' })
+ embeddingId = String(doc!._id)
+ await c.close()
+ })
+
+ afterAll(async () => {
+ await teardownDbs(payload, MONGO_URI, DB)
+ })
+
+ test('returns full EmbeddingRecord including numeric embedding array', async () => {
+ const records = await adapter.findByIds(payload, 'default', [embeddingId])
+ expect(records).toHaveLength(1)
+ const [r] = records
+ expect(r.id).toBe(embeddingId)
+ expect(Array.isArray(r.embedding)).toBe(true)
+ expect(r.embedding.length).toBe(DIMS)
+ expect(r.embedding.every((n) => typeof n === 'number')).toBe(true)
+ expect(r.sourceCollection).toBe('posts')
+ expect(r.chunkText).toBe('find me')
+ expect(r.embeddingVersion).toBe(testEmbeddingVersion)
+ })
+
+ test('includes extension fields', async () => {
+ const [r] = await adapter.findByIds(payload, 'default', [embeddingId])
+ expect((r as any).category).toBe('science')
+ })
+
+ test('drops misses and invalid ids without throwing', async () => {
+ const records = await adapter.findByIds(payload, 'default', [
+ embeddingId,
+ '000000000000000000000000',
+ 'not-an-object-id',
+ ])
+ expect(records).toHaveLength(1)
+ expect(records[0].id).toBe(embeddingId)
+ })
+
+ test('empty ids returns []', async () => {
+ const records = await adapter.findByIds(payload, 'default', [])
+ expect(records).toEqual([])
+ })
+})
diff --git a/adapters/mongodb/src/findByIds.ts b/adapters/mongodb/src/findByIds.ts
new file mode 100644
index 0000000..3704714
--- /dev/null
+++ b/adapters/mongodb/src/findByIds.ts
@@ -0,0 +1,59 @@
+import type { BasePayload } from 'payload'
+import type { EmbeddingRecord } from 'payloadcms-vectorize'
+import { ObjectId } from 'mongodb'
+import { getMongoClient } from './client.js'
+import { RESERVED_FIELDS, type ResolvedPoolConfig } from './types.js'
+
+export interface MongoFindByIdsCtx {
+ uri: string
+ dbName: string
+ pools: Record
+}
+
+const HEX24 = /^[a-f\d]{24}$/i
+const RESERVED_AND_META = new Set([...RESERVED_FIELDS, '_id', 'createdAt', 'updatedAt'])
+
+export async function findByIdsImpl(
+ ctx: MongoFindByIdsCtx,
+ _payload: BasePayload,
+ poolName: string,
+ ids: string[],
+): Promise {
+ if (ids.length === 0) return []
+
+ const cfg = ctx.pools[poolName]
+ if (!cfg) {
+ throw new Error(
+ `[@payloadcms-vectorize/mongodb] Unknown pool "${poolName}". Configured pools: ${Object.keys(ctx.pools).join(', ')}`,
+ )
+ }
+
+ const objectIds = ids.filter((id) => HEX24.test(id)).map((id) => new ObjectId(id))
+ if (objectIds.length === 0) return []
+
+ const client = await getMongoClient(ctx.uri)
+ const docs = await client
+ .db(ctx.dbName)
+ .collection(cfg.collectionName)
+ .find({ _id: { $in: objectIds } })
+ .toArray()
+
+ return docs.map((doc) => mapDocToRecord(doc as Record))
+}
+
+function mapDocToRecord(doc: Record): EmbeddingRecord {
+ const extensionFields = Object.fromEntries(
+ Object.entries(doc).filter(([k]) => !RESERVED_AND_META.has(k)),
+ )
+ return {
+ id: String(doc._id),
+ sourceCollection: String(doc.sourceCollection ?? ''),
+ docId: String(doc.docId ?? ''),
+ chunkIndex:
+ typeof doc.chunkIndex === 'number' ? doc.chunkIndex : Number(doc.chunkIndex ?? 0),
+ chunkText: String(doc.chunkText ?? ''),
+ embeddingVersion: String(doc.embeddingVersion ?? ''),
+ embedding: Array.isArray(doc.embedding) ? (doc.embedding as number[]) : [],
+ ...extensionFields,
+ }
+}
diff --git a/adapters/mongodb/src/index.ts b/adapters/mongodb/src/index.ts
index 0827547..6681836 100644
--- a/adapters/mongodb/src/index.ts
+++ b/adapters/mongodb/src/index.ts
@@ -2,6 +2,7 @@ import type { DbAdapter } from 'payloadcms-vectorize'
import { getMongoClient } from './client.js'
import { storeChunkImpl } from './embed.js'
import { searchImpl } from './search.js'
+import { findByIdsImpl } from './findByIds.js'
import {
resolvePoolConfig,
type MongoVectorIntegrationConfig,
@@ -81,6 +82,8 @@ export const createMongoVectorIntegration = (
search: (payload, queryEmbedding, poolName, limit, where) =>
searchImpl(ctx, payload, queryEmbedding, poolName, limit, where),
+
+ findByIds: (payload, poolName, ids) => findByIdsImpl(ctx, payload, poolName, ids),
}
return { adapter }
From 1e53fee515e5892ffac737c41bc4aaeb83c8bad1 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 1 Jun 2026 13:29:52 +0700
Subject: [PATCH 07/17] feat(cf): implement findByIds via Vectorize getByIds
---
adapters/cf/dev/specs/adapter.spec.ts | 76 +++++++++++++++++++++++++++
adapters/cf/src/findByIds.ts | 44 ++++++++++++++++
adapters/cf/src/index.ts | 3 ++
3 files changed, 123 insertions(+)
create mode 100644 adapters/cf/src/findByIds.ts
diff --git a/adapters/cf/dev/specs/adapter.spec.ts b/adapters/cf/dev/specs/adapter.spec.ts
index b3ac360..0bdf5cf 100644
--- a/adapters/cf/dev/specs/adapter.spec.ts
+++ b/adapters/cf/dev/specs/adapter.spec.ts
@@ -61,6 +61,13 @@ function createMockCloudflareBinding() {
}
}),
+ getByIds: vi.fn(async (ids: string[]) => {
+ return ids
+ .map((id) => storage.get(id))
+ .filter((v): v is { id: string; values: number[]; metadata: any } => v !== undefined)
+ .map((v) => ({ id: v.id, values: v.values, metadata: v.metadata }))
+ }),
+
list: vi.fn(async (options: any) => {
const vectors = Array.from(storage.values()).map((item) => ({
id: item.id,
@@ -419,4 +426,73 @@ describe('createCloudflareVectorizeIntegration', () => {
})
})
})
+
+ describe('findByIds', () => {
+ test('returns full EmbeddingRecord including embedding values', async () => {
+ const mockBinding = createMockCloudflareBinding()
+ const { adapter } = createCloudflareVectorizeIntegration({
+ config: { default: { dims: DIMS } },
+ binding: mockBinding as any,
+ })
+ const mockPayload = createMockPayload(mockBinding)
+ const embedding = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
+
+ await adapter.storeChunk(mockPayload, 'default', {
+ sourceCollection: 'posts',
+ docId: 'doc-1',
+ chunkIndex: 0,
+ chunkText: 'find me',
+ embeddingVersion: 'v1',
+ embedding,
+ extensionFields: { category: 'science' },
+ })
+
+ const id = 'default:posts:doc-1:0'
+ const records = await adapter.findByIds(mockPayload, 'default', [id])
+ expect(records).toHaveLength(1)
+ const [r] = records
+ expect(r.id).toBe(id)
+ expect(r.embedding).toEqual(embedding)
+ expect(r.sourceCollection).toBe('posts')
+ expect(r.docId).toBe('doc-1')
+ expect(r.chunkText).toBe('find me')
+ expect(r.embeddingVersion).toBe('v1')
+ expect((r as any).category).toBe('science')
+ })
+
+ test('drops misses', async () => {
+ const mockBinding = createMockCloudflareBinding()
+ const { adapter } = createCloudflareVectorizeIntegration({
+ config: { default: { dims: DIMS } },
+ binding: mockBinding as any,
+ })
+ const mockPayload = createMockPayload(mockBinding)
+ await adapter.storeChunk(mockPayload, 'default', {
+ sourceCollection: 'posts',
+ docId: 'doc-1',
+ chunkIndex: 0,
+ chunkText: 'x',
+ embeddingVersion: 'v1',
+ embedding: [0, 0, 0, 0, 0, 0, 0, 0],
+ extensionFields: {},
+ })
+ const records = await adapter.findByIds(mockPayload, 'default', [
+ 'default:posts:doc-1:0',
+ 'default:posts:nope:0',
+ ])
+ expect(records).toHaveLength(1)
+ expect(records[0].id).toBe('default:posts:doc-1:0')
+ })
+
+ test('empty ids returns []', async () => {
+ const mockBinding = createMockCloudflareBinding()
+ const { adapter } = createCloudflareVectorizeIntegration({
+ config: { default: { dims: DIMS } },
+ binding: mockBinding as any,
+ })
+ const mockPayload = createMockPayload(mockBinding)
+ const records = await adapter.findByIds(mockPayload, 'default', [])
+ expect(records).toEqual([])
+ })
+ })
})
diff --git a/adapters/cf/src/findByIds.ts b/adapters/cf/src/findByIds.ts
new file mode 100644
index 0000000..35638f5
--- /dev/null
+++ b/adapters/cf/src/findByIds.ts
@@ -0,0 +1,44 @@
+import { BasePayload } from 'payload'
+import { KnowledgePoolName, EmbeddingRecord } from 'payloadcms-vectorize'
+import { getVectorizeBinding } from './types.js'
+
+const RESERVED_METADATA = ['sourceCollection', 'docId', 'chunkIndex', 'chunkText', 'embeddingVersion']
+
+export default async (
+ payload: BasePayload,
+ _poolName: KnowledgePoolName,
+ ids: string[],
+): Promise> => {
+ if (ids.length === 0) return []
+
+ const binding = getVectorizeBinding(payload)
+
+ try {
+ const vectors = await binding.getByIds(ids)
+ if (!vectors) return []
+
+ return vectors.map((vector) => {
+ const metadata = (vector.metadata || {}) as Record
+ const extensionFields = Object.fromEntries(
+ Object.entries(metadata).filter(([k]) => !RESERVED_METADATA.includes(k)),
+ )
+ return {
+ id: vector.id,
+ sourceCollection: String(metadata.sourceCollection ?? ''),
+ docId: String(metadata.docId ?? ''),
+ chunkIndex:
+ typeof metadata.chunkIndex === 'number'
+ ? metadata.chunkIndex
+ : parseInt(String(metadata.chunkIndex ?? '0'), 10),
+ chunkText: String(metadata.chunkText ?? ''),
+ embeddingVersion: String(metadata.embeddingVersion ?? ''),
+ embedding: Array.from(vector.values ?? []),
+ ...extensionFields,
+ }
+ })
+ } catch (e) {
+ const errorMessage = e instanceof Error ? e.message : String(e)
+ payload.logger.error(`[@payloadcms-vectorize/cf] findByIds failed: ${errorMessage}`)
+ throw new Error(`[@payloadcms-vectorize/cf] findByIds failed: ${errorMessage}`)
+ }
+}
diff --git a/adapters/cf/src/index.ts b/adapters/cf/src/index.ts
index 8a6f23a..963530f 100644
--- a/adapters/cf/src/index.ts
+++ b/adapters/cf/src/index.ts
@@ -5,6 +5,7 @@ import type { CloudflareVectorizeBinding, KnowledgePoolsConfig } from './types.j
import cfMappingsCollection, { CF_MAPPINGS_SLUG } from './collections/cfMappings.js'
import embed from './embed.js'
import search from './search.js'
+import findByIds from './findByIds.js'
/**
* Configuration for Cloudflare Vectorize integration
@@ -117,6 +118,8 @@ export const createCloudflareVectorizeIntegration = (
}
},
+ findByIds,
+
hasEmbeddingVersion: async (payload, poolName, sourceCollection, docId, embeddingVersion) => {
const result = await payload.find({
collection: CF_MAPPINGS_SLUG as CollectionSlug,
From 2fe55bc91966437feca8ada649d8fbc28e10d9f3 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 1 Jun 2026 13:35:21 +0700
Subject: [PATCH 08/17] docs: document findEmbeddingsByIds and findByIds
contract
---
README.md | 24 +++++++++++++++++++++++-
adapters/README.md | 35 +++++++++++++++++++++++++++++++++++
2 files changed, 58 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 1a5a625..384a33d 100644
--- a/README.md
+++ b/README.md
@@ -832,7 +832,7 @@ curl -X POST http://localhost:3000/api/vector-retry-failed-batch \
### Local API
-The plugin provides a `getVectorizedPayload(payload)` function which returns a `vectorizedPayload` object exposing `search`, `queueEmbed`, `bulkEmbed`, and `retryFailedBatch` methods.
+The plugin provides a `getVectorizedPayload(payload)` function which returns a `vectorizedPayload` object exposing `search`, `findEmbeddingsByIds`, `queueEmbed`, `bulkEmbed`, and `retryFailedBatch` methods.
#### Getting the Vectorized Payload Object
@@ -883,6 +883,28 @@ const results = await vectorizedPayload.search({
})
```
+#### `vectorizedPayload.findEmbeddingsByIds(params)`
+
+Fetch stored embedding records by primary key — **including the raw embedding vector**, which the normal search/query API never returns. The `id` of each record is whatever [`search()`](#vectorizedpayloadsearchparams) returns as `result.id`, so a search result round-trips directly. This is the building block for "more like this" flows.
+
+**Returns:** `Promise>` — `EmbeddingRecord` is the search result shape without `score` and with `embedding: number[]`.
+
+**Example:**
+
+```typescript
+const [record] = await vectorizedPayload.findEmbeddingsByIds({
+ knowledgePool: 'mainKnowledgePool',
+ ids: [''],
+})
+
+if (record) {
+ // record.embedding is the raw number[] vector — feed it back into search for "more like this"
+ console.log(record.embedding.length, record.chunkText)
+}
+```
+
+Misses are dropped (the result may be shorter than `ids`), order is not guaranteed, and an empty `ids` array returns `[]` without touching the backend.
+
#### `vectorizedPayload.queueEmbed(params)`
Manually queue a vectorization job for a document.
diff --git a/adapters/README.md b/adapters/README.md
index 096a6e4..3b28c84 100644
--- a/adapters/README.md
+++ b/adapters/README.md
@@ -110,6 +110,7 @@ import type {
KnowledgePoolDynamicConfig,
StoreChunkData,
VectorSearchResult,
+ EmbeddingRecord,
} from 'payloadcms-vectorize'
export type DbAdapter = {
@@ -150,6 +151,12 @@ export type DbAdapter = {
limit?: number,
where?: Where,
) => Promise>
+
+ findByIds: (
+ payload: BasePayload,
+ poolName: KnowledgePoolName,
+ ids: string[],
+ ) => Promise>
}
```
@@ -162,6 +169,7 @@ export type DbAdapter = {
| `deleteChunks` | After a source document is deleted. | Remove every chunk where `sourceCollection === ... && docId === ...`. Must be safe to call when no chunks exist (no-op, no throw). |
| `hasEmbeddingVersion` | During bulk-embed planning, per candidate document. | Return `true` iff at least one chunk exists with the matching `(sourceCollection, docId, embeddingVersion)` triple. Must filter on **all three** — older `0.7.0` adapters that ignored `embeddingVersion` caused stale embeddings on model bumps. |
| `search` | Per `/vector-search` request and per `getVectorizedPayload().search()` call. | Translate `where` (Payload-style) into your store's filter language, perform a vector search using `queryEmbedding`, and return up to `limit` results sorted by descending relevance. |
+| `findByIds` | Per `getVectorizedPayload().findEmbeddingsByIds()` call. | Fetch stored embedding records by primary key, **including the raw `embedding` vector** (which `search` never returns). Look up by the same `id` your `search` returns as `result.id`. Misses are dropped (result length may be `< ids.length`); order is not guaranteed; empty `ids` returns `[]` without a backend call; unknown or malformed ids are treated as misses (dropped), not raised as errors. |
### Error contract
@@ -286,6 +294,12 @@ export const createYourDbVectorIntegration = (
// Return Array sorted by descending score.
return []
},
+
+ findByIds: async (payload, poolName, ids) => {
+ // TODO: fetch stored records by primary key, including the raw `embedding` vector.
+ // Return Array. Unknown ids are misses (drop them, don't throw).
+ return []
+ },
}
return { adapter }
@@ -361,6 +375,25 @@ export interface VectorSearchResult {
/** Any extensionFields persisted via storeChunk must round-trip here. */
[key: string]: any
}
+
+export interface EmbeddingRecord {
+ /** Embedding record ID — the same value your adapter returns as VectorSearchResult.id. */
+ id: string
+ /** Source collection slug (echoed from StoreChunkData). */
+ sourceCollection: string
+ /** Source document ID (echoed from StoreChunkData). */
+ docId: string
+ /** Chunk index within the source document. */
+ chunkIndex: number
+ /** The original chunk text. */
+ chunkText: string
+ /** Embedding model/version string. */
+ embeddingVersion: string
+ /** The raw embedding vector — never returned by `search`. */
+ embedding: number[]
+ /** Any extensionFields persisted via storeChunk round-trip here. */
+ [key: string]: any
+}
```
| Field | Required | Notes |
@@ -371,6 +404,8 @@ export interface VectorSearchResult {
| `chunkText`, `embeddingVersion` | yes | Same. |
| `extensionFields.*` | optional | Whatever the user passed in `extensionFields` must be queryable via `where`. |
+> `EmbeddingRecord` (returned by `findByIds`) is `VectorSearchResult` without `score` and with the raw `embedding: number[]`.
+
## Testing your adapter
The dev harness in [`dev/`](../dev) runs the integration suite against any adapter you wire up. To test a new adapter:
From ffb58d34c101f65153817c4332ab3d3c225daaaf Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 1 Jun 2026 14:50:38 +0700
Subject: [PATCH 09/17] docs: scope findByIds malformed-id behavior per adapter
---
adapters/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/adapters/README.md b/adapters/README.md
index 3b28c84..1d57eda 100644
--- a/adapters/README.md
+++ b/adapters/README.md
@@ -169,7 +169,7 @@ export type DbAdapter = {
| `deleteChunks` | After a source document is deleted. | Remove every chunk where `sourceCollection === ... && docId === ...`. Must be safe to call when no chunks exist (no-op, no throw). |
| `hasEmbeddingVersion` | During bulk-embed planning, per candidate document. | Return `true` iff at least one chunk exists with the matching `(sourceCollection, docId, embeddingVersion)` triple. Must filter on **all three** — older `0.7.0` adapters that ignored `embeddingVersion` caused stale embeddings on model bumps. |
| `search` | Per `/vector-search` request and per `getVectorizedPayload().search()` call. | Translate `where` (Payload-style) into your store's filter language, perform a vector search using `queryEmbedding`, and return up to `limit` results sorted by descending relevance. |
-| `findByIds` | Per `getVectorizedPayload().findEmbeddingsByIds()` call. | Fetch stored embedding records by primary key, **including the raw `embedding` vector** (which `search` never returns). Look up by the same `id` your `search` returns as `result.id`. Misses are dropped (result length may be `< ids.length`); order is not guaranteed; empty `ids` returns `[]` without a backend call; unknown or malformed ids are treated as misses (dropped), not raised as errors. |
+| `findByIds` | Per `getVectorizedPayload().findEmbeddingsByIds()` call. | Fetch stored embedding records by primary key, **including the raw `embedding` vector** (which `search` never returns). Look up by the same `id` your `search` returns as `result.id`. Unknown ids are dropped (result length may be `< ids.length`); order is not guaranteed; empty `ids` returns `[]` without a backend call. Adapters with a strict id format (pg integer PK, MongoDB `ObjectId`) also drop *malformed* ids as misses without erroring; adapters keyed on an opaque id (CF's composite vector id) forward ids to the backend as-is, so a backend that rejects a malformed id may surface that error. |
### Error contract
From fb98908ad183fdd5dbbc15169a17f305a85c971d Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 1 Jun 2026 17:22:04 +0700
Subject: [PATCH 10/17] docs(cf): restore JSDoc on retained types after binding
swap
---
adapters/cf/src/types.ts | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/adapters/cf/src/types.ts b/adapters/cf/src/types.ts
index e10e441..a0fd6c7 100644
--- a/adapters/cf/src/types.ts
+++ b/adapters/cf/src/types.ts
@@ -2,6 +2,10 @@
import type { BasePayload } from 'payload'
import { getVectorizedPayload } from 'payloadcms-vectorize'
+/**
+ * Retrieve the Cloudflare Vectorize binding from a Payload instance.
+ * Throws if the binding is not found.
+ */
export function getVectorizeBinding(payload: BasePayload): Vectorize {
const binding = getVectorizedPayload(payload)?.getDbAdapterCustom()
?._vectorizeBinding as Vectorize | undefined
@@ -11,10 +15,17 @@ export function getVectorizeBinding(payload: BasePayload): Vectorize {
return binding
}
+/**
+ * Configuration for a knowledge pool in Cloudflare Vectorize
+ */
export interface CloudflareVectorizePoolConfig {
+ /** Vector dimensions for this pool (must match embedding model output) */
dims: number
}
+/**
+ * All knowledge pools configuration for Cloudflare Vectorize
+ */
export type KnowledgePoolsConfig = Record
/** @deprecated Use the official `Vectorize` type from `@cloudflare/workers-types`. */
From 39c17dc372a291711519aaca9d30c943acf958a3 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Mon, 1 Jun 2026 17:49:31 +0700
Subject: [PATCH 11/17] build: stop root tsconfig from typechecking adapter
sources
The CF adapter's '/// '
pulls in Workers ambient globals that redefine Request/Response (where
.json() returns unknown, not the DOM's any). The root tsconfig included
./adapters/*/src/**, so 'tsc --noEmit' (build:types:all) leaked those
globals into core endpoint/admin code and failed typecheck. Adapters are
already typechecked independently via their own tsconfig.build.json in the
CI build job, so root coverage of adapter sources was redundant.
---
tsconfig.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tsconfig.json b/tsconfig.json
index 8c2fdbc..2622313 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -22,5 +22,5 @@
}
]
},
- "include": ["./src/**/*.ts", "./src/**/*.tsx", "./adapters/*/src/**/*.ts", "./dev/next-env.d.ts"]
+ "include": ["./src/**/*.ts", "./src/**/*.tsx", "./dev/next-env.d.ts"]
}
From 963a5d1c47ac66de9df7becde477649dfd5f242e Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Wed, 3 Jun 2026 19:59:25 +0700
Subject: [PATCH 12/17] refactor(cf): narrow Vectorize binding to the methods
the adapter uses
Depend on Pick
via a named VectorizeBinding type instead of the full 8-method Vectorize
contract. env.VECTORIZE remains assignable; CloudflareVectorizeBinding is
kept as a deprecated alias for back-compat.
---
adapters/cf/src/index.ts | 6 +++---
adapters/cf/src/types.ts | 13 +++++++++----
2 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/adapters/cf/src/index.ts b/adapters/cf/src/index.ts
index dd78a3e..f3a51bb 100644
--- a/adapters/cf/src/index.ts
+++ b/adapters/cf/src/index.ts
@@ -1,7 +1,7 @@
import type { CollectionSlug } from 'payload'
import type { DbAdapter } from 'payloadcms-vectorize'
import { getVectorizeBinding } from './types.js'
-import type { CloudflareVectorizeBinding, KnowledgePoolsConfig } from './types.js'
+import type { CloudflareVectorizeBinding, KnowledgePoolsConfig, VectorizeBinding } from './types.js'
import cfMappingsCollection, { CF_MAPPINGS_SLUG } from './collections/cfMappings.js'
import embed from './embed.js'
import search from './search.js'
@@ -13,7 +13,7 @@ interface CloudflareVectorizeConfig {
/** Knowledge pools configuration with their dimensions */
config: KnowledgePoolsConfig
/** Cloudflare Vectorize binding for vector storage */
- binding: CloudflareVectorizeBinding
+ binding: VectorizeBinding
}
/**
@@ -134,5 +134,5 @@ export const createCloudflareVectorizeIntegration = (
}
export { CF_MAPPINGS_SLUG } from './collections/cfMappings.js'
-export type { CloudflareVectorizeBinding, KnowledgePoolsConfig }
+export type { CloudflareVectorizeBinding, KnowledgePoolsConfig, VectorizeBinding }
export type { KnowledgePoolsConfig as KnowledgePoolConfig }
diff --git a/adapters/cf/src/types.ts b/adapters/cf/src/types.ts
index a0fd6c7..1cecab2 100644
--- a/adapters/cf/src/types.ts
+++ b/adapters/cf/src/types.ts
@@ -2,13 +2,18 @@
import type { BasePayload } from 'payload'
import { getVectorizedPayload } from 'payloadcms-vectorize'
+/**
+ * The subset of the Cloudflare `Vectorize` binding used by this adapter.
+ */
+export type VectorizeBinding = Pick
+
/**
* Retrieve the Cloudflare Vectorize binding from a Payload instance.
* Throws if the binding is not found.
*/
-export function getVectorizeBinding(payload: BasePayload): Vectorize {
+export function getVectorizeBinding(payload: BasePayload): VectorizeBinding {
const binding = getVectorizedPayload(payload)?.getDbAdapterCustom()
- ?._vectorizeBinding as Vectorize | undefined
+ ?._vectorizeBinding as VectorizeBinding | undefined
if (!binding) {
throw new Error('[@payloadcms-vectorize/cf] Cloudflare Vectorize binding not found')
}
@@ -28,5 +33,5 @@ export interface CloudflareVectorizePoolConfig {
*/
export type KnowledgePoolsConfig = Record
-/** @deprecated Use the official `Vectorize` type from `@cloudflare/workers-types`. */
-export type CloudflareVectorizeBinding = Vectorize
+/** @deprecated Use {@link VectorizeBinding}. */
+export type CloudflareVectorizeBinding = VectorizeBinding
From ba5e92f7a708a6fd8f73ccf29b5b912a88eb6a6b Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sat, 6 Jun 2026 17:52:31 +0700
Subject: [PATCH 13/17] feat(find-by-ids): rename to findByIds with opt-in
populateEmbedding
Rename `payload.findEmbeddingsByIds` -> `payload.findByIds` and add an
opt-in `populateEmbedding?: boolean` (default false). `EmbeddingRecord.embedding`
is now optional and only returned when populateEmbedding is true.
Each backend honors the flag at the source where possible: pg skips selecting
the embedding column, mongodb uses { projection: { embedding: 0 } }, and CF
strips values post-fetch (getByIds always returns them). DbAdapter.findByIds
gains the populateEmbedding param; the shared mock and adapters README follow.
Specs split into a populateEmbedding:true case (keeps the full-vector
assertions) plus a default-omits-embedding case.
---
adapters/README.md | 10 +++---
adapters/cf/dev/specs/adapter.spec.ts | 32 ++++++++++++++++++--
adapters/cf/src/findByIds.ts | 3 +-
adapters/mongodb/dev/specs/findByIds.spec.ts | 18 ++++++++---
adapters/mongodb/src/findByIds.ts | 14 ++++++---
adapters/mongodb/src/index.ts | 3 +-
adapters/pg/dev/specs/findByIds.spec.ts | 17 ++++++++---
adapters/pg/src/findByIds.ts | 10 ++++--
dev/helpers/mockAdapter.ts | 3 +-
dev/specs/vectorizedPayload.spec.ts | 31 ++++++++++++++-----
src/index.ts | 13 ++++++--
src/types.ts | 6 ++--
12 files changed, 124 insertions(+), 36 deletions(-)
diff --git a/adapters/README.md b/adapters/README.md
index 1d57eda..a600310 100644
--- a/adapters/README.md
+++ b/adapters/README.md
@@ -156,6 +156,7 @@ export type DbAdapter = {
payload: BasePayload,
poolName: KnowledgePoolName,
ids: string[],
+ populateEmbedding?: boolean,
) => Promise>
}
```
@@ -169,7 +170,7 @@ export type DbAdapter = {
| `deleteChunks` | After a source document is deleted. | Remove every chunk where `sourceCollection === ... && docId === ...`. Must be safe to call when no chunks exist (no-op, no throw). |
| `hasEmbeddingVersion` | During bulk-embed planning, per candidate document. | Return `true` iff at least one chunk exists with the matching `(sourceCollection, docId, embeddingVersion)` triple. Must filter on **all three** — older `0.7.0` adapters that ignored `embeddingVersion` caused stale embeddings on model bumps. |
| `search` | Per `/vector-search` request and per `getVectorizedPayload().search()` call. | Translate `where` (Payload-style) into your store's filter language, perform a vector search using `queryEmbedding`, and return up to `limit` results sorted by descending relevance. |
-| `findByIds` | Per `getVectorizedPayload().findEmbeddingsByIds()` call. | Fetch stored embedding records by primary key, **including the raw `embedding` vector** (which `search` never returns). Look up by the same `id` your `search` returns as `result.id`. Unknown ids are dropped (result length may be `< ids.length`); order is not guaranteed; empty `ids` returns `[]` without a backend call. Adapters with a strict id format (pg integer PK, MongoDB `ObjectId`) also drop *malformed* ids as misses without erroring; adapters keyed on an opaque id (CF's composite vector id) forward ids to the backend as-is, so a backend that rejects a malformed id may surface that error. |
+| `findByIds` | Per `getVectorizedPayload().findByIds()` call. | Fetch stored embedding records by primary key. The raw `embedding` vector is **only included when `populateEmbedding` is `true`** (default `false`) — omit it otherwise so callers that only need text/metadata don't pay for it. Where possible, skip reading the vector at the source (pg: don't select the column; MongoDB: `{ projection: { embedding: 0 } }`); CF's `getByIds` always returns values, so omit them post-fetch. Look up by the same `id` your `search` returns as `result.id`. Unknown ids are dropped (result length may be `< ids.length`); order is not guaranteed; empty `ids` returns `[]` without a backend call. Adapters with a strict id format (pg integer PK, MongoDB `ObjectId`) also drop *malformed* ids as misses without erroring; adapters keyed on an opaque id (CF's composite vector id) forward ids to the backend as-is, so a backend that rejects a malformed id may surface that error. |
### Error contract
@@ -295,8 +296,9 @@ export const createYourDbVectorIntegration = (
return []
},
- findByIds: async (payload, poolName, ids) => {
- // TODO: fetch stored records by primary key, including the raw `embedding` vector.
+ findByIds: async (payload, poolName, ids, populateEmbedding = false) => {
+ // TODO: fetch stored records by primary key. Include the raw `embedding` vector
+ // only when `populateEmbedding` is true (default false); skip reading it otherwise.
// Return Array. Unknown ids are misses (drop them, don't throw).
return []
},
@@ -404,7 +406,7 @@ export interface EmbeddingRecord {
| `chunkText`, `embeddingVersion` | yes | Same. |
| `extensionFields.*` | optional | Whatever the user passed in `extensionFields` must be queryable via `where`. |
-> `EmbeddingRecord` (returned by `findByIds`) is `VectorSearchResult` without `score` and with the raw `embedding: number[]`.
+> `EmbeddingRecord` (returned by `findByIds`) is `VectorSearchResult` without `score` and with an optional raw `embedding?: number[]` — present only when `findByIds` is called with `populateEmbedding: true`.
## Testing your adapter
diff --git a/adapters/cf/dev/specs/adapter.spec.ts b/adapters/cf/dev/specs/adapter.spec.ts
index 32091d0..dac497f 100644
--- a/adapters/cf/dev/specs/adapter.spec.ts
+++ b/adapters/cf/dev/specs/adapter.spec.ts
@@ -440,7 +440,7 @@ describe('createCloudflareVectorizeIntegration', () => {
})
describe('findByIds', () => {
- test('returns full EmbeddingRecord including embedding values', async () => {
+ test('returns full EmbeddingRecord including embedding values when populateEmbedding is true', async () => {
const mockBinding = createMockCloudflareBinding()
const { adapter } = createCloudflareVectorizeIntegration({
config: { default: { dims: DIMS } },
@@ -460,7 +460,7 @@ describe('createCloudflareVectorizeIntegration', () => {
})
const id = 'default:posts:doc-1:0'
- const records = await adapter.findByIds(mockPayload, 'default', [id])
+ const records = await adapter.findByIds(mockPayload, 'default', [id], true)
expect(records).toHaveLength(1)
const [r] = records
expect(r.id).toBe(id)
@@ -472,6 +472,34 @@ describe('createCloudflareVectorizeIntegration', () => {
expect((r as any).category).toBe('science')
})
+ test('omits embedding values by default', async () => {
+ const mockBinding = createMockCloudflareBinding()
+ const { adapter } = createCloudflareVectorizeIntegration({
+ config: { default: { dims: DIMS } },
+ binding: mockBinding as any,
+ })
+ const mockPayload = createMockPayload(mockBinding)
+
+ await adapter.storeChunk(mockPayload, 'default', {
+ sourceCollection: 'posts',
+ docId: 'doc-1',
+ chunkIndex: 0,
+ chunkText: 'find me',
+ embeddingVersion: 'v1',
+ embedding: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8],
+ extensionFields: { category: 'science' },
+ })
+
+ const id = 'default:posts:doc-1:0'
+ const records = await adapter.findByIds(mockPayload, 'default', [id])
+ expect(records).toHaveLength(1)
+ const [r] = records
+ expect(r.id).toBe(id)
+ expect(r.embedding).toBeUndefined()
+ expect(r.chunkText).toBe('find me')
+ expect((r as any).category).toBe('science')
+ })
+
test('drops misses', async () => {
const mockBinding = createMockCloudflareBinding()
const { adapter } = createCloudflareVectorizeIntegration({
diff --git a/adapters/cf/src/findByIds.ts b/adapters/cf/src/findByIds.ts
index 35638f5..de7cf62 100644
--- a/adapters/cf/src/findByIds.ts
+++ b/adapters/cf/src/findByIds.ts
@@ -8,6 +8,7 @@ export default async (
payload: BasePayload,
_poolName: KnowledgePoolName,
ids: string[],
+ populateEmbedding = false,
): Promise> => {
if (ids.length === 0) return []
@@ -32,7 +33,7 @@ export default async (
: parseInt(String(metadata.chunkIndex ?? '0'), 10),
chunkText: String(metadata.chunkText ?? ''),
embeddingVersion: String(metadata.embeddingVersion ?? ''),
- embedding: Array.from(vector.values ?? []),
+ ...(populateEmbedding ? { embedding: Array.from(vector.values ?? []) } : {}),
...extensionFields,
}
})
diff --git a/adapters/mongodb/dev/specs/findByIds.spec.ts b/adapters/mongodb/dev/specs/findByIds.spec.ts
index 87d04ac..7cea989 100644
--- a/adapters/mongodb/dev/specs/findByIds.spec.ts
+++ b/adapters/mongodb/dev/specs/findByIds.spec.ts
@@ -54,19 +54,29 @@ describe('mongodb findByIds', () => {
await teardownDbs(payload, MONGO_URI, DB)
})
- test('returns full EmbeddingRecord including numeric embedding array', async () => {
- const records = await adapter.findByIds(payload, 'default', [embeddingId])
+ test('returns full EmbeddingRecord including numeric embedding array when populateEmbedding is true', async () => {
+ const records = await adapter.findByIds(payload, 'default', [embeddingId], true)
expect(records).toHaveLength(1)
const [r] = records
expect(r.id).toBe(embeddingId)
expect(Array.isArray(r.embedding)).toBe(true)
- expect(r.embedding.length).toBe(DIMS)
- expect(r.embedding.every((n) => typeof n === 'number')).toBe(true)
+ expect(r.embedding!.length).toBe(DIMS)
+ expect(r.embedding!.every((n) => typeof n === 'number')).toBe(true)
expect(r.sourceCollection).toBe('posts')
expect(r.chunkText).toBe('find me')
expect(r.embeddingVersion).toBe(testEmbeddingVersion)
})
+ test('omits the embedding array by default', async () => {
+ const records = await adapter.findByIds(payload, 'default', [embeddingId])
+ expect(records).toHaveLength(1)
+ const [r] = records
+ expect(r.id).toBe(embeddingId)
+ expect(r.embedding).toBeUndefined()
+ expect(r.sourceCollection).toBe('posts')
+ expect(r.chunkText).toBe('find me')
+ })
+
test('includes extension fields', async () => {
const [r] = await adapter.findByIds(payload, 'default', [embeddingId])
expect((r as any).category).toBe('science')
diff --git a/adapters/mongodb/src/findByIds.ts b/adapters/mongodb/src/findByIds.ts
index 3704714..d8b3b03 100644
--- a/adapters/mongodb/src/findByIds.ts
+++ b/adapters/mongodb/src/findByIds.ts
@@ -18,6 +18,7 @@ export async function findByIdsImpl(
_payload: BasePayload,
poolName: string,
ids: string[],
+ populateEmbedding = false,
): Promise {
if (ids.length === 0) return []
@@ -35,13 +36,16 @@ export async function findByIdsImpl(
const docs = await client
.db(ctx.dbName)
.collection(cfg.collectionName)
- .find({ _id: { $in: objectIds } })
+ .find({ _id: { $in: objectIds } }, populateEmbedding ? {} : { projection: { embedding: 0 } })
.toArray()
- return docs.map((doc) => mapDocToRecord(doc as Record))
+ return docs.map((doc) => mapDocToRecord(doc as Record, populateEmbedding))
}
-function mapDocToRecord(doc: Record): EmbeddingRecord {
+function mapDocToRecord(
+ doc: Record,
+ populateEmbedding: boolean,
+): EmbeddingRecord {
const extensionFields = Object.fromEntries(
Object.entries(doc).filter(([k]) => !RESERVED_AND_META.has(k)),
)
@@ -53,7 +57,9 @@ function mapDocToRecord(doc: Record): EmbeddingRecord {
typeof doc.chunkIndex === 'number' ? doc.chunkIndex : Number(doc.chunkIndex ?? 0),
chunkText: String(doc.chunkText ?? ''),
embeddingVersion: String(doc.embeddingVersion ?? ''),
- embedding: Array.isArray(doc.embedding) ? (doc.embedding as number[]) : [],
+ ...(populateEmbedding
+ ? { embedding: Array.isArray(doc.embedding) ? (doc.embedding as number[]) : [] }
+ : {}),
...extensionFields,
}
}
diff --git a/adapters/mongodb/src/index.ts b/adapters/mongodb/src/index.ts
index b1c0914..5a0d2d2 100644
--- a/adapters/mongodb/src/index.ts
+++ b/adapters/mongodb/src/index.ts
@@ -91,7 +91,8 @@ export const createMongoVectorIntegration = (
search: (payload, queryEmbedding, poolName, limit, where) =>
searchImpl(getCtx(), payload, queryEmbedding, poolName, limit, where),
- findByIds: (payload, poolName, ids) => findByIdsImpl(getCtx(), payload, poolName, ids),
+ findByIds: (payload, poolName, ids, populateEmbedding) =>
+ findByIdsImpl(getCtx(), payload, poolName, ids, populateEmbedding),
}
return { adapter }
diff --git a/adapters/pg/dev/specs/findByIds.spec.ts b/adapters/pg/dev/specs/findByIds.spec.ts
index c1c2323..eeb5cf8 100644
--- a/adapters/pg/dev/specs/findByIds.spec.ts
+++ b/adapters/pg/dev/specs/findByIds.spec.ts
@@ -74,19 +74,28 @@ describe('pg findByIds', () => {
await destroyPayload(payload)
})
- test('returns full EmbeddingRecord including numeric embedding array', async () => {
- const records = await integration.adapter.findByIds(payload, 'default', [embeddingId])
+ test('returns full EmbeddingRecord including numeric embedding array when populateEmbedding is true', async () => {
+ const records = await integration.adapter.findByIds(payload, 'default', [embeddingId], true)
expect(records).toHaveLength(1)
const [r] = records
expect(r.id).toBe(embeddingId)
expect(Array.isArray(r.embedding)).toBe(true)
- expect(r.embedding.length).toBe(DIMS)
- expect(r.embedding.every((n) => typeof n === 'number')).toBe(true)
+ expect(r.embedding!.length).toBe(DIMS)
+ expect(r.embedding!.every((n) => typeof n === 'number')).toBe(true)
expect(r.sourceCollection).toBe('posts')
expect(typeof r.chunkText).toBe('string')
expect(r.embeddingVersion).toBe(testEmbeddingVersion)
})
+ test('omits the embedding array by default', async () => {
+ const records = await integration.adapter.findByIds(payload, 'default', [embeddingId])
+ expect(records).toHaveLength(1)
+ const [r] = records
+ expect(r.id).toBe(embeddingId)
+ expect(r.embedding).toBeUndefined()
+ expect(r.sourceCollection).toBe('posts')
+ })
+
test('includes extension fields when the pool defines them', async () => {
const [r] = await integration.adapter.findByIds(payload, 'default', [embeddingId])
expect((r as any).category).toBe('science')
diff --git a/adapters/pg/src/findByIds.ts b/adapters/pg/src/findByIds.ts
index 76a9e9d..5c2f4e3 100644
--- a/adapters/pg/src/findByIds.ts
+++ b/adapters/pg/src/findByIds.ts
@@ -8,6 +8,7 @@ export default async (
payload: BasePayload,
poolName: KnowledgePoolName,
ids: string[],
+ populateEmbedding = false,
): Promise> => {
if (ids.length === 0) return []
@@ -37,7 +38,9 @@ export default async (
const selectObj: Record = {
id: table.id,
- embedding: table.embedding,
+ }
+ if (populateEmbedding) {
+ selectObj.embedding = table.embedding
}
for (const field of collectionConfig.fields ?? []) {
if (typeof field === 'object' && 'name' in field) {
@@ -51,12 +54,13 @@ export default async (
}
const rows = await drizzle.select(selectObj).from(table).where(inArray(table.id, numericIds))
- return mapRowsToRecords(rows, collectionConfig)
+ return mapRowsToRecords(rows, collectionConfig, populateEmbedding)
}
function mapRowsToRecords(
rows: Record[],
collectionConfig: SanitizedCollectionConfig,
+ populateEmbedding: boolean,
): Array {
const numberFields = new Set()
for (const field of collectionConfig.fields) {
@@ -75,7 +79,7 @@ function mapRowsToRecords(
docId: String(rawDocId),
chunkIndex:
typeof rawChunkIndex === 'number' ? rawChunkIndex : parseInt(String(rawChunkIndex), 10),
- embedding: parseEmbedding(row.embedding),
+ ...(populateEmbedding ? { embedding: parseEmbedding(row.embedding) } : {}),
} as EmbeddingRecord
for (const fieldName of numberFields) {
diff --git a/dev/helpers/mockAdapter.ts b/dev/helpers/mockAdapter.ts
index 932a1bd..80053a8 100644
--- a/dev/helpers/mockAdapter.ts
+++ b/dev/helpers/mockAdapter.ts
@@ -200,6 +200,7 @@ export const createMockAdapter = (options: MockAdapterOptions = {}): DbAdapter =
payload: BasePayload,
poolName: KnowledgePoolName,
ids: string[],
+ populateEmbedding = false,
): Promise => {
const records: EmbeddingRecord[] = []
for (const id of ids) {
@@ -220,7 +221,7 @@ export const createMockAdapter = (options: MockAdapterOptions = {}): DbAdapter =
} = doc as any
records.push({
id: stored.id,
- embedding: stored.embedding,
+ ...(populateEmbedding ? { embedding: stored.embedding } : {}),
...docFields,
} as EmbeddingRecord)
} catch (_e) {
diff --git a/dev/specs/vectorizedPayload.spec.ts b/dev/specs/vectorizedPayload.spec.ts
index 7895bd5..4f986a8 100644
--- a/dev/specs/vectorizedPayload.spec.ts
+++ b/dev/specs/vectorizedPayload.spec.ts
@@ -200,7 +200,7 @@ describe('VectorizedPayload', () => {
})
})
- describe('findEmbeddingsByIds method', () => {
+ describe('findByIds method', () => {
let embeddingId: string
beforeAll(async () => {
@@ -217,29 +217,44 @@ describe('VectorizedPayload', () => {
embeddingId = String(rows.docs[0].id)
})
- test('payload has findEmbeddingsByIds method', () => {
+ test('payload has findByIds method', () => {
const vectorizedPayload = getVectorizedPayload(payload)
- expect(typeof vectorizedPayload!.findEmbeddingsByIds).toBe('function')
+ expect(typeof vectorizedPayload!.findByIds).toBe('function')
})
- test('returns the full EmbeddingRecord including the embedding vector', async () => {
+ test('returns the full EmbeddingRecord including the embedding vector when populateEmbedding is true', async () => {
const vectorizedPayload = getVectorizedPayload(payload)!
- const records = await vectorizedPayload.findEmbeddingsByIds({
+ const records = await vectorizedPayload.findByIds({
knowledgePool: 'default',
ids: [embeddingId],
+ populateEmbedding: true,
})
expect(records).toHaveLength(1)
const [record] = records
expect(record.id).toBe(embeddingId)
expect(Array.isArray(record.embedding)).toBe(true)
- expect(record.embedding.length).toBe(DIMS)
+ expect(record.embedding!.length).toBe(DIMS)
+ expect(typeof record.sourceCollection).toBe('string')
+ expect(typeof record.chunkText).toBe('string')
+ })
+
+ test('omits the embedding vector by default', async () => {
+ const vectorizedPayload = getVectorizedPayload(payload)!
+ const records = await vectorizedPayload.findByIds({
+ knowledgePool: 'default',
+ ids: [embeddingId],
+ })
+ expect(records).toHaveLength(1)
+ const [record] = records
+ expect(record.id).toBe(embeddingId)
+ expect(record.embedding).toBeUndefined()
expect(typeof record.sourceCollection).toBe('string')
expect(typeof record.chunkText).toBe('string')
})
test('drops unknown ids (result length < ids length)', async () => {
const vectorizedPayload = getVectorizedPayload(payload)!
- const records = await vectorizedPayload.findEmbeddingsByIds({
+ const records = await vectorizedPayload.findByIds({
knowledgePool: 'default',
ids: [embeddingId, 'definitely-not-an-id-999999'],
})
@@ -249,7 +264,7 @@ describe('VectorizedPayload', () => {
test('empty ids returns []', async () => {
const vectorizedPayload = getVectorizedPayload(payload)!
- const records = await vectorizedPayload.findEmbeddingsByIds({
+ const records = await vectorizedPayload.findByIds({
knowledgePool: 'default',
ids: [],
})
diff --git a/src/index.ts b/src/index.ts
index 6e77714..45a961e 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -357,9 +357,18 @@ export default (pluginOptions: PayloadcmsVectorizeConfig) =>
params.limit,
params.where,
),
- findEmbeddingsByIds: (params: { knowledgePool: KnowledgePoolName; ids: string[] }) => {
+ findByIds: (params: {
+ knowledgePool: KnowledgePoolName
+ ids: string[]
+ populateEmbedding?: boolean
+ }) => {
if (params.ids.length === 0) return Promise.resolve([])
- return pluginOptions.dbAdapter.findByIds(payload, params.knowledgePool, params.ids)
+ return pluginOptions.dbAdapter.findByIds(
+ payload,
+ params.knowledgePool,
+ params.ids,
+ params.populateEmbedding ?? false,
+ )
},
queueEmbed: async (
params:
diff --git a/src/types.ts b/src/types.ts
index 0701068..09f685b 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -57,9 +57,10 @@ export type VectorizedPayload = {
_isBulkEmbedEnabled: (knowledgePool: KnowledgePoolName) => boolean
getDbAdapterCustom: () => Record | undefined
search: (params: VectorSearchQuery) => Promise>
- findEmbeddingsByIds: (params: {
+ findByIds: (params: {
knowledgePool: KnowledgePoolName
ids: string[]
+ populateEmbedding?: boolean
}) => Promise>
queueEmbed: (
params:
@@ -333,7 +334,7 @@ export interface EmbeddingRecord {
chunkIndex: number
chunkText: string
embeddingVersion: string
- embedding: number[]
+ embedding?: number[]
[key: string]: any
}
@@ -449,5 +450,6 @@ export type DbAdapter = {
payload: BasePayload,
poolName: KnowledgePoolName,
ids: string[],
+ populateEmbedding?: boolean,
) => Promise>
}
From e72adf4b8f4d85a015d859600156c9682edc57be Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sat, 6 Jun 2026 18:32:04 +0700
Subject: [PATCH 14/17] docs: align README method name + EmbeddingRecord with
shipped findByIds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Root README advertised a `findEmbeddingsByIds` method that doesn't exist —
the shipped public method is `findByIds`. Rename the method reference and
example, document the `populateEmbedding?` param (default false), and fix the
adapters/README EmbeddingRecord interface block to `embedding?: number[]`
(optional, present only when populateEmbedding: true).
---
README.md | 15 +++++++++------
adapters/README.md | 5 +++--
2 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/README.md b/README.md
index 384a33d..fb4fba5 100644
--- a/README.md
+++ b/README.md
@@ -832,7 +832,7 @@ curl -X POST http://localhost:3000/api/vector-retry-failed-batch \
### Local API
-The plugin provides a `getVectorizedPayload(payload)` function which returns a `vectorizedPayload` object exposing `search`, `findEmbeddingsByIds`, `queueEmbed`, `bulkEmbed`, and `retryFailedBatch` methods.
+The plugin provides a `getVectorizedPayload(payload)` function which returns a `vectorizedPayload` object exposing `search`, `findByIds`, `queueEmbed`, `bulkEmbed`, and `retryFailedBatch` methods.
#### Getting the Vectorized Payload Object
@@ -883,23 +883,26 @@ const results = await vectorizedPayload.search({
})
```
-#### `vectorizedPayload.findEmbeddingsByIds(params)`
+#### `vectorizedPayload.findByIds(params)`
-Fetch stored embedding records by primary key — **including the raw embedding vector**, which the normal search/query API never returns. The `id` of each record is whatever [`search()`](#vectorizedpayloadsearchparams) returns as `result.id`, so a search result round-trips directly. This is the building block for "more like this" flows.
+Fetch stored embedding records by primary key. The `id` of each record is whatever [`search()`](#vectorizedpayloadsearchparams) returns as `result.id`, so a search result round-trips directly. Pass `populateEmbedding: true` to also get the raw embedding vector back (the normal search/query API never returns it) — the building block for "more like this" flows. It defaults to `false`, so by default you get the record's text and metadata without the heavy vector.
-**Returns:** `Promise>` — `EmbeddingRecord` is the search result shape without `score` and with `embedding: number[]`.
+**Params:** `{ knowledgePool: string; ids: string[]; populateEmbedding?: boolean }` (`populateEmbedding` defaults to `false`).
+
+**Returns:** `Promise>` — `EmbeddingRecord` is the search result shape without `score` and with an optional `embedding?: number[]`, present only when `populateEmbedding: true`.
**Example:**
```typescript
-const [record] = await vectorizedPayload.findEmbeddingsByIds({
+const [record] = await vectorizedPayload.findByIds({
knowledgePool: 'mainKnowledgePool',
ids: [''],
+ populateEmbedding: true,
})
if (record) {
// record.embedding is the raw number[] vector — feed it back into search for "more like this"
- console.log(record.embedding.length, record.chunkText)
+ console.log(record.embedding!.length, record.chunkText)
}
```
diff --git a/adapters/README.md b/adapters/README.md
index a600310..d33a85a 100644
--- a/adapters/README.md
+++ b/adapters/README.md
@@ -391,8 +391,9 @@ export interface EmbeddingRecord {
chunkText: string
/** Embedding model/version string. */
embeddingVersion: string
- /** The raw embedding vector — never returned by `search`. */
- embedding: number[]
+ /** The raw embedding vector — never returned by `search`, and only present
+ * when `findByIds` is called with `populateEmbedding: true`. */
+ embedding?: number[]
/** Any extensionFields persisted via storeChunk round-trip here. */
[key: string]: any
}
From 4011e91b4d86f41ff8cf393cb97d4fe1b9ca7250 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sat, 6 Jun 2026 21:28:55 +0700
Subject: [PATCH 15/17] fix(pg): support non-integer (uuid) ids in findByIds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
findByIds filtered ids through /^\d+$/ and mapped to Number, hardcoding an
integer primary key. The embeddings collection defines no custom id, so under
postgresAdapter({ idType: 'uuid' }) every embedding id is a uuid — the filter
dropped all of them and findByIds returned [] for ids that exist, while
search() round-tripped the same uuids fine.
Pass ids straight to inArray; Postgres casts the text params to the column
type, supporting both integer and uuid PKs. Well-formed but nonexistent ids
are still absent from results; a malformed id now surfaces a backend error
rather than being silently dropped (documented in adapters/README).
Adds a uuid-idType regression spec.
---
adapters/README.md | 2 +-
adapters/pg/dev/specs/findByIds.spec.ts | 94 +++++++++++++++++++++++++
adapters/pg/src/findByIds.ts | 5 +-
3 files changed, 96 insertions(+), 5 deletions(-)
diff --git a/adapters/README.md b/adapters/README.md
index d33a85a..37467fe 100644
--- a/adapters/README.md
+++ b/adapters/README.md
@@ -170,7 +170,7 @@ export type DbAdapter = {
| `deleteChunks` | After a source document is deleted. | Remove every chunk where `sourceCollection === ... && docId === ...`. Must be safe to call when no chunks exist (no-op, no throw). |
| `hasEmbeddingVersion` | During bulk-embed planning, per candidate document. | Return `true` iff at least one chunk exists with the matching `(sourceCollection, docId, embeddingVersion)` triple. Must filter on **all three** — older `0.7.0` adapters that ignored `embeddingVersion` caused stale embeddings on model bumps. |
| `search` | Per `/vector-search` request and per `getVectorizedPayload().search()` call. | Translate `where` (Payload-style) into your store's filter language, perform a vector search using `queryEmbedding`, and return up to `limit` results sorted by descending relevance. |
-| `findByIds` | Per `getVectorizedPayload().findByIds()` call. | Fetch stored embedding records by primary key. The raw `embedding` vector is **only included when `populateEmbedding` is `true`** (default `false`) — omit it otherwise so callers that only need text/metadata don't pay for it. Where possible, skip reading the vector at the source (pg: don't select the column; MongoDB: `{ projection: { embedding: 0 } }`); CF's `getByIds` always returns values, so omit them post-fetch. Look up by the same `id` your `search` returns as `result.id`. Unknown ids are dropped (result length may be `< ids.length`); order is not guaranteed; empty `ids` returns `[]` without a backend call. Adapters with a strict id format (pg integer PK, MongoDB `ObjectId`) also drop *malformed* ids as misses without erroring; adapters keyed on an opaque id (CF's composite vector id) forward ids to the backend as-is, so a backend that rejects a malformed id may surface that error. |
+| `findByIds` | Per `getVectorizedPayload().findByIds()` call. | Fetch stored embedding records by primary key. The raw `embedding` vector is **only included when `populateEmbedding` is `true`** (default `false`) — omit it otherwise so callers that only need text/metadata don't pay for it. Where possible, skip reading the vector at the source (pg: don't select the column; MongoDB: `{ projection: { embedding: 0 } }`); CF's `getByIds` always returns values, so omit them post-fetch. Look up by the same `id` your `search` returns as `result.id`. Well-formed but nonexistent ids are dropped (result length may be `< ids.length`); order is not guaranteed; empty `ids` returns `[]` without a backend call. Whether a *malformed* id (wrong shape for the key type) is dropped or surfaces an error is adapter-specific: an adapter that validates the id shape itself (MongoDB drops non-24-hex ids via an `ObjectId` guard) treats it as a miss; an adapter that forwards ids straight to the backend (pg passes them to the `IN` query, supporting both integer and `uuid` PKs; CF forwards its composite vector id) lets the backend reject a malformed id, which may surface as an error. |
### Error contract
diff --git a/adapters/pg/dev/specs/findByIds.spec.ts b/adapters/pg/dev/specs/findByIds.spec.ts
index eeb5cf8..67a2930 100644
--- a/adapters/pg/dev/specs/findByIds.spec.ts
+++ b/adapters/pg/dev/specs/findByIds.spec.ts
@@ -112,3 +112,97 @@ describe('pg findByIds', () => {
expect(records).toEqual([])
})
})
+
+describe('pg findByIds (uuid idType)', () => {
+ let payload: Payload
+ const dbName = 'pg_find_by_ids_uuid_test'
+ let embeddingId: string
+
+ beforeAll(async () => {
+ await createTestDb({ dbName })
+ const config = await buildDummyConfig({
+ jobs: { tasks: [], autoRun: [{ cron: '*/5 * * * * *', limit: 10 }] },
+ collections: [
+ { slug: 'posts', fields: [
+ { name: 'title', type: 'text' },
+ { name: 'category', type: 'text' },
+ ] },
+ ],
+ db: postgresAdapter({
+ idType: 'uuid',
+ extensions: ['vector'],
+ afterSchemaInit: [integration.afterSchemaInitHook],
+ pool: { connectionString: `postgresql://postgres:password@localhost:5433/${dbName}` },
+ }),
+ plugins: [
+ plugin({
+ knowledgePools: {
+ default: {
+ collections: {
+ posts: {
+ toKnowledgePool: async (doc) => {
+ const chunks: Array<{ chunk: string; category?: string }> = []
+ if (doc.title) {
+ for (const chunk of chunkText(doc.title)) {
+ chunks.push({ chunk, category: doc.category || 'general' })
+ }
+ }
+ return chunks
+ },
+ },
+ },
+ extensionFields: [{ name: 'category', type: 'text' }],
+ embeddingConfig: {
+ version: testEmbeddingVersion,
+ queryFn: makeDummyEmbedQuery(DIMS),
+ realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+ },
+ },
+ },
+ }),
+ ],
+ })
+ payload = await getPayload({ config, key: `pg-find-by-ids-uuid-${Date.now()}`, cron: true })
+
+ const post = await payload.create({
+ collection: 'posts',
+ data: { title: 'Find me by uuid', category: 'science' },
+ })
+ await waitForVectorizationJobs(payload)
+ const rows = await payload.find({
+ collection: 'default' as any,
+ where: { docId: { equals: String(post.id) } },
+ limit: 1,
+ })
+ embeddingId = String(rows.docs[0].id)
+ })
+
+ afterAll(async () => {
+ await destroyPayload(payload)
+ })
+
+ test('embedding id is a uuid, not a numeric PK', () => {
+ expect(embeddingId).toMatch(
+ /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i,
+ )
+ })
+
+ test('findByIds resolves a uuid id (regression: numeric-only filter dropped uuids)', async () => {
+ const records = await integration.adapter.findByIds(payload, 'default', [embeddingId], true)
+ expect(records).toHaveLength(1)
+ const [r] = records
+ expect(r.id).toBe(embeddingId)
+ expect(Array.isArray(r.embedding)).toBe(true)
+ expect(r.embedding!.length).toBe(DIMS)
+ expect((r as any).category).toBe('science')
+ })
+
+ test('drops a well-formed but nonexistent uuid', async () => {
+ const records = await integration.adapter.findByIds(payload, 'default', [
+ embeddingId,
+ '00000000-0000-0000-0000-000000000000',
+ ])
+ expect(records).toHaveLength(1)
+ expect(records[0].id).toBe(embeddingId)
+ })
+})
diff --git a/adapters/pg/src/findByIds.ts b/adapters/pg/src/findByIds.ts
index 5c2f4e3..3dc26b4 100644
--- a/adapters/pg/src/findByIds.ts
+++ b/adapters/pg/src/findByIds.ts
@@ -33,9 +33,6 @@ export default async (
)
}
- const numericIds = ids.filter((id) => /^\d+$/.test(id)).map(Number)
- if (numericIds.length === 0) return []
-
const selectObj: Record = {
id: table.id,
}
@@ -53,7 +50,7 @@ export default async (
}
}
- const rows = await drizzle.select(selectObj).from(table).where(inArray(table.id, numericIds))
+ const rows = await drizzle.select(selectObj).from(table).where(inArray(table.id, ids))
return mapRowsToRecords(rows, collectionConfig, populateEmbedding)
}
From 1a9063d059baeac248fb3a10e608f59a7632415f Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Sat, 6 Jun 2026 21:39:11 +0700
Subject: [PATCH 16/17] fix(pg): coerce nullable text fields to "" in findByIds
and search
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
chunkText and embeddingVersion are not required in the embeddings schema, so a
null column was spread through raw as `null`, violating EmbeddingRecord /
VectorSearchResult (`chunkText: string`). CF and MongoDB both coerce via
String(x ?? '') → '', so identical data round-tripped as '' on those adapters
but null on pg; a consumer doing record.chunkText.length crashed only on pg.
Coerce sourceCollection/chunkText/embeddingVersion via String(x ?? '') in both
mapRowsToRecords (findByIds) and mapRowsToResults (search) so pg matches the
declared types and the other adapters. Adds a regression test.
---
adapters/pg/dev/specs/findByIds.spec.ts | 17 +++++++++++++++++
adapters/pg/src/findByIds.ts | 5 ++++-
adapters/pg/src/search.ts | 5 ++++-
3 files changed, 25 insertions(+), 2 deletions(-)
diff --git a/adapters/pg/dev/specs/findByIds.spec.ts b/adapters/pg/dev/specs/findByIds.spec.ts
index 67a2930..beb35c2 100644
--- a/adapters/pg/dev/specs/findByIds.spec.ts
+++ b/adapters/pg/dev/specs/findByIds.spec.ts
@@ -1,6 +1,8 @@
import type { Payload } from 'payload'
import { afterAll, beforeAll, describe, expect, test } from 'vitest'
import { postgresAdapter } from '@payloadcms/db-postgres'
+import { eq } from '@payloadcms/db-postgres/drizzle'
+import { getEmbeddingsTable } from '../../src/drizzle.js'
import { buildDummyConfig, integration, plugin, DIMS } from './constants.js'
import { createTestDb, destroyPayload, waitForVectorizationJobs } from './utils.js'
import { getPayload } from 'payload'
@@ -111,6 +113,21 @@ describe('pg findByIds', () => {
const records = await integration.adapter.findByIds(payload, 'default', [])
expect(records).toEqual([])
})
+
+ test('coerces null chunkText/embeddingVersion to "" (EmbeddingRecord type)', async () => {
+ // These columns are not required in the embeddings schema, so a row can have
+ // nulls. Set them directly and confirm findByIds returns '' (parity with cf/mongo),
+ // not null — which would violate EmbeddingRecord's `chunkText: string`.
+ const table = getEmbeddingsTable('default')!
+ await (payload.db as any).drizzle
+ .update(table)
+ .set({ chunkText: null, embeddingVersion: null })
+ .where(eq(table.id, Number(embeddingId)))
+
+ const [r] = await integration.adapter.findByIds(payload, 'default', [embeddingId])
+ expect(r.chunkText).toBe('')
+ expect(r.embeddingVersion).toBe('')
+ })
})
describe('pg findByIds (uuid idType)', () => {
diff --git a/adapters/pg/src/findByIds.ts b/adapters/pg/src/findByIds.ts
index 3dc26b4..cf64dc3 100644
--- a/adapters/pg/src/findByIds.ts
+++ b/adapters/pg/src/findByIds.ts
@@ -73,9 +73,12 @@ function mapRowsToRecords(
const record = {
...row,
id: String(row.id),
- docId: String(rawDocId),
+ sourceCollection: String(row.sourceCollection ?? ''),
+ docId: String(rawDocId ?? ''),
chunkIndex:
typeof rawChunkIndex === 'number' ? rawChunkIndex : parseInt(String(rawChunkIndex), 10),
+ chunkText: String(row.chunkText ?? ''),
+ embeddingVersion: String(row.embeddingVersion ?? ''),
...(populateEmbedding ? { embedding: parseEmbedding(row.embedding) } : {}),
} as EmbeddingRecord
diff --git a/adapters/pg/src/search.ts b/adapters/pg/src/search.ts
index ce31c56..54dcc20 100644
--- a/adapters/pg/src/search.ts
+++ b/adapters/pg/src/search.ts
@@ -303,10 +303,13 @@ function mapRowsToResults(
const result = {
...row,
id: String(row.id),
- docId: String(rawDocId),
+ sourceCollection: String(row.sourceCollection ?? ''),
+ docId: String(rawDocId ?? ''),
score: typeof rawScore === 'number' ? rawScore : parseFloat(String(rawScore)),
chunkIndex:
typeof rawChunkIndex === 'number' ? rawChunkIndex : parseInt(String(rawChunkIndex), 10),
+ chunkText: String(row.chunkText ?? ''),
+ embeddingVersion: String(row.embeddingVersion ?? ''),
} as VectorSearchResult
// Ensure any number fields from the schema are numbers in the result
From c357d789eabe129e27dcbf3352865d1f4c336ee4 Mon Sep 17 00:00:00 2001
From: techiejd <62455039+techiejd@users.noreply.github.com>
Date: Wed, 10 Jun 2026 20:50:45 +0700
Subject: [PATCH 17/17] feat(find-by-ids): return a map keyed by id instead of
an array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
findByIds now returns Record instead of
Array. Order isn't conserved by any backend and a lookup may
miss, so an array forced callers to re-join by id and made misses a silent gap.
Keying by the requested id makes the round-trip O(1) (records[searchHit.id]),
order irrelevant, and a miss an explicit undefined. Every requested id is a key.
Unify the malformed-id contract: unknown AND malformed ids map to undefined,
never throw. pg now filters ids that don't match the PK column type
(getSQLType: numeric for integer/serial, uuid-shaped for uuid) before the IN
query, so a bad id is a miss instead of a cast error that poisoned the batch —
matching mongo (non-24-hex drop) and cf (unknown ids absent from getByIds).
Stop the mock adapter from swallowing real errors: only Payload NotFound is
treated as a miss; everything else rethrows.
Docs + specs updated across all adapters; note that key order is not input
order (integer-like keys sort first) so callers must look up by id.
---
README.md | 10 ++--
adapters/README.md | 9 ++--
adapters/cf/dev/specs/adapter.spec.ts | 21 ++++----
adapters/cf/src/findByIds.ts | 15 +++---
adapters/mongodb/dev/specs/findByIds.spec.ts | 26 +++++-----
adapters/mongodb/src/findByIds.ts | 14 ++++--
adapters/pg/dev/specs/findByIds.spec.ts | 52 +++++++++++++-------
adapters/pg/src/findByIds.ts | 32 ++++++++++--
dev/helpers/mockAdapter.ts | 42 +++++++++-------
dev/specs/vectorizedPayload.spec.ts | 21 ++++----
src/index.ts | 2 +-
src/types.ts | 4 +-
12 files changed, 159 insertions(+), 89 deletions(-)
diff --git a/README.md b/README.md
index fb4fba5..5499040 100644
--- a/README.md
+++ b/README.md
@@ -889,24 +889,26 @@ Fetch stored embedding records by primary key. The `id` of each record is whatev
**Params:** `{ knowledgePool: string; ids: string[]; populateEmbedding?: boolean }` (`populateEmbedding` defaults to `false`).
-**Returns:** `Promise>` — `EmbeddingRecord` is the search result shape without `score` and with an optional `embedding?: number[]`, present only when `populateEmbedding: true`.
+**Returns:** `Promise>` — an object keyed by the ids you passed in. Each requested id is present as a key; a found record is the value, and an unknown or malformed id maps to `undefined`. `EmbeddingRecord` is the search result shape without `score` and with an optional `embedding?: number[]`, present only when `populateEmbedding: true`.
**Example:**
```typescript
-const [record] = await vectorizedPayload.findByIds({
+const id = ''
+const records = await vectorizedPayload.findByIds({
knowledgePool: 'mainKnowledgePool',
- ids: [''],
+ ids: [id],
populateEmbedding: true,
})
+const record = records[id]
if (record) {
// record.embedding is the raw number[] vector — feed it back into search for "more like this"
console.log(record.embedding!.length, record.chunkText)
}
```
-Misses are dropped (the result may be shorter than `ids`), order is not guaranteed, and an empty `ids` array returns `[]` without touching the backend.
+Because the result is keyed by id, a search result round-trips directly (`records[searchHit.id]`) and there's no positional alignment to worry about — look records up by id rather than relying on key order. Unknown or malformed ids map to `undefined` (never throw), and an empty `ids` array returns `{}` without touching the backend.
#### `vectorizedPayload.queueEmbed(params)`
diff --git a/adapters/README.md b/adapters/README.md
index 37467fe..9b9cbac 100644
--- a/adapters/README.md
+++ b/adapters/README.md
@@ -157,7 +157,7 @@ export type DbAdapter = {
poolName: KnowledgePoolName,
ids: string[],
populateEmbedding?: boolean,
- ) => Promise>
+ ) => Promise>
}
```
@@ -170,7 +170,7 @@ export type DbAdapter = {
| `deleteChunks` | After a source document is deleted. | Remove every chunk where `sourceCollection === ... && docId === ...`. Must be safe to call when no chunks exist (no-op, no throw). |
| `hasEmbeddingVersion` | During bulk-embed planning, per candidate document. | Return `true` iff at least one chunk exists with the matching `(sourceCollection, docId, embeddingVersion)` triple. Must filter on **all three** — older `0.7.0` adapters that ignored `embeddingVersion` caused stale embeddings on model bumps. |
| `search` | Per `/vector-search` request and per `getVectorizedPayload().search()` call. | Translate `where` (Payload-style) into your store's filter language, perform a vector search using `queryEmbedding`, and return up to `limit` results sorted by descending relevance. |
-| `findByIds` | Per `getVectorizedPayload().findByIds()` call. | Fetch stored embedding records by primary key. The raw `embedding` vector is **only included when `populateEmbedding` is `true`** (default `false`) — omit it otherwise so callers that only need text/metadata don't pay for it. Where possible, skip reading the vector at the source (pg: don't select the column; MongoDB: `{ projection: { embedding: 0 } }`); CF's `getByIds` always returns values, so omit them post-fetch. Look up by the same `id` your `search` returns as `result.id`. Well-formed but nonexistent ids are dropped (result length may be `< ids.length`); order is not guaranteed; empty `ids` returns `[]` without a backend call. Whether a *malformed* id (wrong shape for the key type) is dropped or surfaces an error is adapter-specific: an adapter that validates the id shape itself (MongoDB drops non-24-hex ids via an `ObjectId` guard) treats it as a miss; an adapter that forwards ids straight to the backend (pg passes them to the `IN` query, supporting both integer and `uuid` PKs; CF forwards its composite vector id) lets the backend reject a malformed id, which may surface as an error. |
+| `findByIds` | Per `getVectorizedPayload().findByIds()` call. | Fetch stored embedding records by primary key. **Return an object keyed by the ids you were given:** every requested id must be present as a key, with a found record as the value and `undefined` for any id that didn't resolve. The raw `embedding` vector is **only included when `populateEmbedding` is `true`** (default `false`) — omit it otherwise so callers that only need text/metadata don't pay for it. Where possible, skip reading the vector at the source (pg: don't select the column; MongoDB: `{ projection: { embedding: 0 } }`); CF's `getByIds` always returns values, so omit them post-fetch. Look up by the same `id` your `search` returns as `result.id`. Unknown **and** malformed ids must map to `undefined` — never throw for a bad id. Validate the id shape against your key type before querying so a malformed id can't error the whole batch (MongoDB drops non-24-hex ids; pg drops ids that don't match the PK column type — numeric for integer PKs, uuid-shaped for `uuid` PKs — before the `IN` query; CF's ids are arbitrary strings, so an unknown one is simply absent from `getByIds`). Empty `ids` returns `{}` without a backend call. |
### Error contract
@@ -299,8 +299,9 @@ export const createYourDbVectorIntegration = (
findByIds: async (payload, poolName, ids, populateEmbedding = false) => {
// TODO: fetch stored records by primary key. Include the raw `embedding` vector
// only when `populateEmbedding` is true (default false); skip reading it otherwise.
- // Return Array. Unknown ids are misses (drop them, don't throw).
- return []
+ // Return an object keyed by every requested id: a record for hits, `undefined`
+ // for unknown or malformed ids (never throw for a bad id).
+ return Object.fromEntries(ids.map((id) => [id, undefined]))
},
}
diff --git a/adapters/cf/dev/specs/adapter.spec.ts b/adapters/cf/dev/specs/adapter.spec.ts
index dac497f..ee988be 100644
--- a/adapters/cf/dev/specs/adapter.spec.ts
+++ b/adapters/cf/dev/specs/adapter.spec.ts
@@ -461,8 +461,8 @@ describe('createCloudflareVectorizeIntegration', () => {
const id = 'default:posts:doc-1:0'
const records = await adapter.findByIds(mockPayload, 'default', [id], true)
- expect(records).toHaveLength(1)
- const [r] = records
+ expect(Object.keys(records)).toEqual([id])
+ const r = records[id]!
expect(r.id).toBe(id)
expect(r.embedding).toEqual(embedding)
expect(r.sourceCollection).toBe('posts')
@@ -492,15 +492,15 @@ describe('createCloudflareVectorizeIntegration', () => {
const id = 'default:posts:doc-1:0'
const records = await adapter.findByIds(mockPayload, 'default', [id])
- expect(records).toHaveLength(1)
- const [r] = records
+ expect(Object.keys(records)).toEqual([id])
+ const r = records[id]!
expect(r.id).toBe(id)
expect(r.embedding).toBeUndefined()
expect(r.chunkText).toBe('find me')
expect((r as any).category).toBe('science')
})
- test('drops misses', async () => {
+ test('maps misses to undefined', async () => {
const mockBinding = createMockCloudflareBinding()
const { adapter } = createCloudflareVectorizeIntegration({
config: { default: { dims: DIMS } },
@@ -520,11 +520,14 @@ describe('createCloudflareVectorizeIntegration', () => {
'default:posts:doc-1:0',
'default:posts:nope:0',
])
- expect(records).toHaveLength(1)
- expect(records[0].id).toBe('default:posts:doc-1:0')
+ expect(Object.keys(records).sort()).toEqual(
+ ['default:posts:doc-1:0', 'default:posts:nope:0'].sort(),
+ )
+ expect(records['default:posts:doc-1:0']!.id).toBe('default:posts:doc-1:0')
+ expect(records['default:posts:nope:0']).toBeUndefined()
})
- test('empty ids returns []', async () => {
+ test('empty ids returns {}', async () => {
const mockBinding = createMockCloudflareBinding()
const { adapter } = createCloudflareVectorizeIntegration({
config: { default: { dims: DIMS } },
@@ -532,7 +535,7 @@ describe('createCloudflareVectorizeIntegration', () => {
})
const mockPayload = createMockPayload(mockBinding)
const records = await adapter.findByIds(mockPayload, 'default', [])
- expect(records).toEqual([])
+ expect(records).toEqual({})
})
})
})
diff --git a/adapters/cf/src/findByIds.ts b/adapters/cf/src/findByIds.ts
index de7cf62..c8f2104 100644
--- a/adapters/cf/src/findByIds.ts
+++ b/adapters/cf/src/findByIds.ts
@@ -9,21 +9,23 @@ export default async (
_poolName: KnowledgePoolName,
ids: string[],
populateEmbedding = false,
-): Promise> => {
- if (ids.length === 0) return []
+): Promise> => {
+ const result: Record = {}
+ for (const id of ids) result[id] = undefined
+ if (ids.length === 0) return result
const binding = getVectorizeBinding(payload)
try {
const vectors = await binding.getByIds(ids)
- if (!vectors) return []
+ if (!vectors) return result
- return vectors.map((vector) => {
+ for (const vector of vectors) {
const metadata = (vector.metadata || {}) as Record
const extensionFields = Object.fromEntries(
Object.entries(metadata).filter(([k]) => !RESERVED_METADATA.includes(k)),
)
- return {
+ result[vector.id] = {
id: vector.id,
sourceCollection: String(metadata.sourceCollection ?? ''),
docId: String(metadata.docId ?? ''),
@@ -36,7 +38,8 @@ export default async (
...(populateEmbedding ? { embedding: Array.from(vector.values ?? []) } : {}),
...extensionFields,
}
- })
+ }
+ return result
} catch (e) {
const errorMessage = e instanceof Error ? e.message : String(e)
payload.logger.error(`[@payloadcms-vectorize/cf] findByIds failed: ${errorMessage}`)
diff --git a/adapters/mongodb/dev/specs/findByIds.spec.ts b/adapters/mongodb/dev/specs/findByIds.spec.ts
index 7cea989..231a0e4 100644
--- a/adapters/mongodb/dev/specs/findByIds.spec.ts
+++ b/adapters/mongodb/dev/specs/findByIds.spec.ts
@@ -56,8 +56,8 @@ describe('mongodb findByIds', () => {
test('returns full EmbeddingRecord including numeric embedding array when populateEmbedding is true', async () => {
const records = await adapter.findByIds(payload, 'default', [embeddingId], true)
- expect(records).toHaveLength(1)
- const [r] = records
+ expect(Object.keys(records)).toEqual([embeddingId])
+ const r = records[embeddingId]!
expect(r.id).toBe(embeddingId)
expect(Array.isArray(r.embedding)).toBe(true)
expect(r.embedding!.length).toBe(DIMS)
@@ -69,8 +69,8 @@ describe('mongodb findByIds', () => {
test('omits the embedding array by default', async () => {
const records = await adapter.findByIds(payload, 'default', [embeddingId])
- expect(records).toHaveLength(1)
- const [r] = records
+ expect(Object.keys(records)).toEqual([embeddingId])
+ const r = records[embeddingId]!
expect(r.id).toBe(embeddingId)
expect(r.embedding).toBeUndefined()
expect(r.sourceCollection).toBe('posts')
@@ -78,22 +78,26 @@ describe('mongodb findByIds', () => {
})
test('includes extension fields', async () => {
- const [r] = await adapter.findByIds(payload, 'default', [embeddingId])
- expect((r as any).category).toBe('science')
+ const records = await adapter.findByIds(payload, 'default', [embeddingId])
+ expect((records[embeddingId] as any).category).toBe('science')
})
- test('drops misses and invalid ids without throwing', async () => {
+ test('maps misses and invalid ids to undefined without throwing', async () => {
const records = await adapter.findByIds(payload, 'default', [
embeddingId,
'000000000000000000000000',
'not-an-object-id',
])
- expect(records).toHaveLength(1)
- expect(records[0].id).toBe(embeddingId)
+ expect(Object.keys(records).sort()).toEqual(
+ [embeddingId, '000000000000000000000000', 'not-an-object-id'].sort(),
+ )
+ expect(records[embeddingId]!.id).toBe(embeddingId)
+ expect(records['000000000000000000000000']).toBeUndefined()
+ expect(records['not-an-object-id']).toBeUndefined()
})
- test('empty ids returns []', async () => {
+ test('empty ids returns {}', async () => {
const records = await adapter.findByIds(payload, 'default', [])
- expect(records).toEqual([])
+ expect(records).toEqual({})
})
})
diff --git a/adapters/mongodb/src/findByIds.ts b/adapters/mongodb/src/findByIds.ts
index d8b3b03..fe90203 100644
--- a/adapters/mongodb/src/findByIds.ts
+++ b/adapters/mongodb/src/findByIds.ts
@@ -19,8 +19,10 @@ export async function findByIdsImpl(
poolName: string,
ids: string[],
populateEmbedding = false,
-): Promise {
- if (ids.length === 0) return []
+): Promise> {
+ const result: Record = {}
+ for (const id of ids) result[id] = undefined
+ if (ids.length === 0) return result
const cfg = ctx.pools[poolName]
if (!cfg) {
@@ -30,7 +32,7 @@ export async function findByIdsImpl(
}
const objectIds = ids.filter((id) => HEX24.test(id)).map((id) => new ObjectId(id))
- if (objectIds.length === 0) return []
+ if (objectIds.length === 0) return result
const client = await getMongoClient(ctx.uri)
const docs = await client
@@ -39,7 +41,11 @@ export async function findByIdsImpl(
.find({ _id: { $in: objectIds } }, populateEmbedding ? {} : { projection: { embedding: 0 } })
.toArray()
- return docs.map((doc) => mapDocToRecord(doc as Record, populateEmbedding))
+ for (const doc of docs) {
+ const record = mapDocToRecord(doc as Record, populateEmbedding)
+ result[record.id] = record
+ }
+ return result
}
function mapDocToRecord(
diff --git a/adapters/pg/dev/specs/findByIds.spec.ts b/adapters/pg/dev/specs/findByIds.spec.ts
index beb35c2..f03a260 100644
--- a/adapters/pg/dev/specs/findByIds.spec.ts
+++ b/adapters/pg/dev/specs/findByIds.spec.ts
@@ -78,8 +78,8 @@ describe('pg findByIds', () => {
test('returns full EmbeddingRecord including numeric embedding array when populateEmbedding is true', async () => {
const records = await integration.adapter.findByIds(payload, 'default', [embeddingId], true)
- expect(records).toHaveLength(1)
- const [r] = records
+ expect(Object.keys(records)).toEqual([embeddingId])
+ const r = records[embeddingId]!
expect(r.id).toBe(embeddingId)
expect(Array.isArray(r.embedding)).toBe(true)
expect(r.embedding!.length).toBe(DIMS)
@@ -91,27 +91,35 @@ describe('pg findByIds', () => {
test('omits the embedding array by default', async () => {
const records = await integration.adapter.findByIds(payload, 'default', [embeddingId])
- expect(records).toHaveLength(1)
- const [r] = records
+ expect(Object.keys(records)).toEqual([embeddingId])
+ const r = records[embeddingId]!
expect(r.id).toBe(embeddingId)
expect(r.embedding).toBeUndefined()
expect(r.sourceCollection).toBe('posts')
})
test('includes extension fields when the pool defines them', async () => {
- const [r] = await integration.adapter.findByIds(payload, 'default', [embeddingId])
- expect((r as any).category).toBe('science')
+ const records = await integration.adapter.findByIds(payload, 'default', [embeddingId])
+ expect((records[embeddingId] as any).category).toBe('science')
})
- test('drops misses', async () => {
+ test('maps a well-formed but nonexistent id to undefined', async () => {
const records = await integration.adapter.findByIds(payload, 'default', [embeddingId, '999999'])
- expect(records).toHaveLength(1)
- expect(records[0].id).toBe(embeddingId)
+ expect(Object.keys(records).sort()).toEqual([embeddingId, '999999'].sort())
+ expect(records[embeddingId]!.id).toBe(embeddingId)
+ expect(records['999999']).toBeUndefined()
+ })
+
+ test('maps a malformed (non-numeric) id to undefined instead of throwing', async () => {
+ const records = await integration.adapter.findByIds(payload, 'default', [embeddingId, 'not-an-id'])
+ expect(Object.keys(records).sort()).toEqual([embeddingId, 'not-an-id'].sort())
+ expect(records[embeddingId]!.id).toBe(embeddingId)
+ expect(records['not-an-id']).toBeUndefined()
})
- test('empty ids returns []', async () => {
+ test('empty ids returns {}', async () => {
const records = await integration.adapter.findByIds(payload, 'default', [])
- expect(records).toEqual([])
+ expect(records).toEqual({})
})
test('coerces null chunkText/embeddingVersion to "" (EmbeddingRecord type)', async () => {
@@ -124,7 +132,7 @@ describe('pg findByIds', () => {
.set({ chunkText: null, embeddingVersion: null })
.where(eq(table.id, Number(embeddingId)))
- const [r] = await integration.adapter.findByIds(payload, 'default', [embeddingId])
+ const r = (await integration.adapter.findByIds(payload, 'default', [embeddingId]))[embeddingId]!
expect(r.chunkText).toBe('')
expect(r.embeddingVersion).toBe('')
})
@@ -206,20 +214,30 @@ describe('pg findByIds (uuid idType)', () => {
test('findByIds resolves a uuid id (regression: numeric-only filter dropped uuids)', async () => {
const records = await integration.adapter.findByIds(payload, 'default', [embeddingId], true)
- expect(records).toHaveLength(1)
- const [r] = records
+ expect(Object.keys(records)).toEqual([embeddingId])
+ const r = records[embeddingId]!
expect(r.id).toBe(embeddingId)
expect(Array.isArray(r.embedding)).toBe(true)
expect(r.embedding!.length).toBe(DIMS)
expect((r as any).category).toBe('science')
})
- test('drops a well-formed but nonexistent uuid', async () => {
+ test('maps a well-formed but nonexistent uuid to undefined', async () => {
const records = await integration.adapter.findByIds(payload, 'default', [
embeddingId,
'00000000-0000-0000-0000-000000000000',
])
- expect(records).toHaveLength(1)
- expect(records[0].id).toBe(embeddingId)
+ expect(Object.keys(records).sort()).toEqual(
+ [embeddingId, '00000000-0000-0000-0000-000000000000'].sort(),
+ )
+ expect(records[embeddingId]!.id).toBe(embeddingId)
+ expect(records['00000000-0000-0000-0000-000000000000']).toBeUndefined()
+ })
+
+ test('maps a malformed (non-uuid) id to undefined instead of throwing', async () => {
+ const records = await integration.adapter.findByIds(payload, 'default', [embeddingId, '999999'])
+ expect(Object.keys(records).sort()).toEqual([embeddingId, '999999'].sort())
+ expect(records[embeddingId]!.id).toBe(embeddingId)
+ expect(records['999999']).toBeUndefined()
})
})
diff --git a/adapters/pg/src/findByIds.ts b/adapters/pg/src/findByIds.ts
index cf64dc3..9e51aeb 100644
--- a/adapters/pg/src/findByIds.ts
+++ b/adapters/pg/src/findByIds.ts
@@ -9,8 +9,10 @@ export default async (
poolName: KnowledgePoolName,
ids: string[],
populateEmbedding = false,
-): Promise> => {
- if (ids.length === 0) return []
+): Promise> => {
+ const result: Record = {}
+ for (const id of ids) result[id] = undefined
+ if (ids.length === 0) return result
const isPostgres = payload.db?.pool?.query || payload.db?.drizzle
if (!isPostgres) {
@@ -33,6 +35,12 @@ export default async (
)
}
+ // Drop ids that can't match the primary-key column type before querying, so a
+ // malformed id is treated as a miss instead of making Postgres reject the cast
+ // and throw for the whole batch.
+ const queryableIds = ids.filter((id) => idMatchesPkType(table.id, id))
+ if (queryableIds.length === 0) return result
+
const selectObj: Record = {
id: table.id,
}
@@ -50,8 +58,24 @@ export default async (
}
}
- const rows = await drizzle.select(selectObj).from(table).where(inArray(table.id, ids))
- return mapRowsToRecords(rows, collectionConfig, populateEmbedding)
+ const rows = await drizzle.select(selectObj).from(table).where(inArray(table.id, queryableIds))
+ for (const record of mapRowsToRecords(rows, collectionConfig, populateEmbedding)) {
+ result[record.id] = record
+ }
+ return result
+}
+
+const UUID = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i
+
+function idMatchesPkType(idColumn: { getSQLType?: () => string }, id: string): boolean {
+ const sqlType = idColumn.getSQLType?.() ?? ''
+ if (sqlType === 'integer' || sqlType === 'serial' || sqlType === 'bigint' || sqlType === 'bigserial') {
+ return /^\d+$/.test(id)
+ }
+ if (sqlType === 'uuid') {
+ return UUID.test(id)
+ }
+ return true
}
function mapRowsToRecords(
diff --git a/dev/helpers/mockAdapter.ts b/dev/helpers/mockAdapter.ts
index 80053a8..0659f1d 100644
--- a/dev/helpers/mockAdapter.ts
+++ b/dev/helpers/mockAdapter.ts
@@ -201,31 +201,37 @@ export const createMockAdapter = (options: MockAdapterOptions = {}): DbAdapter =
poolName: KnowledgePoolName,
ids: string[],
populateEmbedding = false,
- ): Promise => {
- const records: EmbeddingRecord[] = []
+ ): Promise> => {
+ const records: Record = {}
for (const id of ids) {
+ records[id] = undefined
const stored = storage.get(`${poolName}:${id}`)
if (!stored) continue
+ let doc: Record | null
try {
- const doc = await payload.findByID({
+ doc = (await payload.findByID({
collection: poolName as CollectionSlug,
id: stored.id,
- })
- if (!doc) continue
- const {
- id: _id,
- createdAt: _createdAt,
- updatedAt: _updatedAt,
- embedding: _embedding,
- ...docFields
- } = doc as any
- records.push({
- id: stored.id,
- ...(populateEmbedding ? { embedding: stored.embedding } : {}),
- ...docFields,
- } as EmbeddingRecord)
- } catch (_e) {
+ })) as Record | null
+ } catch (e) {
+ if (e instanceof Error && e.name === 'NotFound') {
+ continue
+ }
+ throw e
}
+ if (!doc) continue
+ const {
+ id: _id,
+ createdAt: _createdAt,
+ updatedAt: _updatedAt,
+ embedding: _embedding,
+ ...docFields
+ } = doc
+ records[id] = {
+ id: stored.id,
+ ...(populateEmbedding ? { embedding: stored.embedding } : {}),
+ ...docFields,
+ } as EmbeddingRecord
}
return records
},
diff --git a/dev/specs/vectorizedPayload.spec.ts b/dev/specs/vectorizedPayload.spec.ts
index 4f986a8..f4c36b4 100644
--- a/dev/specs/vectorizedPayload.spec.ts
+++ b/dev/specs/vectorizedPayload.spec.ts
@@ -229,8 +229,8 @@ describe('VectorizedPayload', () => {
ids: [embeddingId],
populateEmbedding: true,
})
- expect(records).toHaveLength(1)
- const [record] = records
+ expect(Object.keys(records)).toEqual([embeddingId])
+ const record = records[embeddingId]!
expect(record.id).toBe(embeddingId)
expect(Array.isArray(record.embedding)).toBe(true)
expect(record.embedding!.length).toBe(DIMS)
@@ -244,31 +244,34 @@ describe('VectorizedPayload', () => {
knowledgePool: 'default',
ids: [embeddingId],
})
- expect(records).toHaveLength(1)
- const [record] = records
+ expect(Object.keys(records)).toEqual([embeddingId])
+ const record = records[embeddingId]!
expect(record.id).toBe(embeddingId)
expect(record.embedding).toBeUndefined()
expect(typeof record.sourceCollection).toBe('string')
expect(typeof record.chunkText).toBe('string')
})
- test('drops unknown ids (result length < ids length)', async () => {
+ test('maps unknown ids to undefined (every requested id is a key)', async () => {
const vectorizedPayload = getVectorizedPayload(payload)!
const records = await vectorizedPayload.findByIds({
knowledgePool: 'default',
ids: [embeddingId, 'definitely-not-an-id-999999'],
})
- expect(records).toHaveLength(1)
- expect(records[0].id).toBe(embeddingId)
+ expect(Object.keys(records).sort()).toEqual(
+ [embeddingId, 'definitely-not-an-id-999999'].sort(),
+ )
+ expect(records[embeddingId]!.id).toBe(embeddingId)
+ expect(records['definitely-not-an-id-999999']).toBeUndefined()
})
- test('empty ids returns []', async () => {
+ test('empty ids returns {}', async () => {
const vectorizedPayload = getVectorizedPayload(payload)!
const records = await vectorizedPayload.findByIds({
knowledgePool: 'default',
ids: [],
})
- expect(records).toEqual([])
+ expect(records).toEqual({})
})
})
diff --git a/src/index.ts b/src/index.ts
index 45a961e..7621b9c 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -362,7 +362,7 @@ export default (pluginOptions: PayloadcmsVectorizeConfig) =>
ids: string[]
populateEmbedding?: boolean
}) => {
- if (params.ids.length === 0) return Promise.resolve([])
+ if (params.ids.length === 0) return Promise.resolve({})
return pluginOptions.dbAdapter.findByIds(
payload,
params.knowledgePool,
diff --git a/src/types.ts b/src/types.ts
index 09f685b..2fb10b1 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -61,7 +61,7 @@ export type VectorizedPayload = {
knowledgePool: KnowledgePoolName
ids: string[]
populateEmbedding?: boolean
- }) => Promise>
+ }) => Promise>
queueEmbed: (
params:
| {
@@ -451,5 +451,5 @@ export type DbAdapter = {
poolName: KnowledgePoolName,
ids: string[],
populateEmbedding?: boolean,
- ) => Promise>
+ ) => Promise>
}