diff --git a/adapters/README.md b/adapters/README.md index 096a6e4..10b9d07 100644 --- a/adapters/README.md +++ b/adapters/README.md @@ -5,6 +5,7 @@ > **If you just want to use an existing adapter**, jump to the package READMEs: > - [`@payloadcms-vectorize/pg`](./pg/README.md) — PostgreSQL + pgvector > - [`@payloadcms-vectorize/cf`](./cf/README.md) — Cloudflare Vectorize +> - [`@payloadcms-vectorize/mongodb`](./mongodb/README.md) — MongoDB Atlas + self-hosted 8.2+ > - Or see the [main README](../README.md) for end-to-end setup. --- @@ -380,7 +381,7 @@ The dev harness in [`dev/`](../dev) runs the integration suite against any adapt 3. Run the suites that matter: - `pnpm test:int` — full integration suite (real Payload, real DB). - `pnpm test:e2e` — Playwright E2E. - - `pnpm vitest adapters/pg/dev/specs/vectorSearchWhere.spec.ts` — the `where` operator suite (31 tests). **Every adapter should pass this.** + - `pnpm vitest adapters/pg/dev/specs/vectorSearchWhere.spec.ts` — the `where` operator suite (31 tests). **Every adapter should pass this** (or an equivalent — CF's equivalent is `where.spec.ts`). If your store doesn't support an operator, document it in [Adapter feature parity](#adapter-feature-parity) and have your `where` translation throw a clear error rather than silently returning wrong results. @@ -398,14 +399,14 @@ If your store doesn't support an operator, document it in [Adapter feature parit ## Adapter feature parity -| Feature | PG | CF | Notes | -|---|---|---|---| -| Real-time ingest (`storeChunk`) | ✅ | ✅ | | -| Bulk ingest (`hasEmbeddingVersion` + `storeChunk`) | ✅ | ✅ | | -| `where` operators | full | full | Both pass `vectorSearchWhere.spec.ts`. | -| Server-side `like` regex | ✅ | ✅ (with regex escape — see CHANGELOG 0.7.1) | | -| Migration CLI bin | ✅ (`vectorize:migrate`) | ❌ | CF uses indexes managed via Cloudflare API. | -| Score range | `[0, 1]` (cosine similarity) | varies by index metric | Document yours. | +| Feature | PG | CF | MongoDB | Notes | +|---|---|---|---|---| +| Real-time ingest (`storeChunk`) | ✅ | ✅ | ✅ | | +| Bulk ingest (`hasEmbeddingVersion` + `storeChunk`) | ✅ | ✅ | ✅ | | +| `where` operators | full | full | full | PG & MongoDB pass `vectorSearchWhere.spec.ts` (31 tests each); CF covers `where` in `where.spec.ts` (59 tests). MongoDB has documented pre/post-filter behavior — see [Limitations](./mongodb/README.md#limitations). | +| Server-side `like` regex | ✅ | ✅ (with regex escape — see CHANGELOG 0.7.1) | ✅ | | +| Migration CLI bin | ✅ (`vectorize:migrate`) | ❌ | ❌ | CF uses indexes managed via Cloudflare API. MongoDB creates search indexes at runtime via `ensureSearchIndex` (no bin). | +| Score range | `[0, 1]` (cosine similarity) | varies by index metric | `vectorSearchScore`; range depends on the index similarity metric | Document yours. | If something here is out of date, please [open an issue](https://github.com/techiejd/payloadcms-vectorize/issues) — adapter parity drift is exactly what this table exists to surface. diff --git a/adapters/mongodb/dev/specs/extensionFields.spec.ts b/adapters/mongodb/dev/specs/extensionFields.spec.ts index ac5d1f2..938fbe1 100644 --- a/adapters/mongodb/dev/specs/extensionFields.spec.ts +++ b/adapters/mongodb/dev/specs/extensionFields.spec.ts @@ -106,4 +106,24 @@ describe('Extension fields (mongodb)', () => { expect((hit as any).category).toBe('cat-a') expect((hit as any).priority).toBe(7) }, 90_000) + + test('stored fields outside filterableFields are also returned by search (CF/PG parity)', async () => { + const target = Array(DIMS).fill(0.66) + await adapter.storeChunk(payload, 'default', { + sourceCollection: 'posts', + docId: 'doc-nonfilterable', + chunkIndex: 0, + chunkText: 'parity', + embeddingVersion: testEmbeddingVersion, + embedding: target, + extensionFields: { category: 'cat-a', priority: 9, note: 'not-a-filterable-field' }, + }) + + await new Promise((r) => setTimeout(r, 1500)) + + const r = await adapter.search(payload, target, 'default', 5) + const hit = r.find((x) => x.docId === 'doc-nonfilterable') + expect(hit).toBeDefined() + expect((hit as any).note).toBe('not-a-filterable-field') + }, 90_000) }) diff --git a/adapters/mongodb/src/search.ts b/adapters/mongodb/src/search.ts index 02c5392..508c944 100644 --- a/adapters/mongodb/src/search.ts +++ b/adapters/mongodb/src/search.ts @@ -5,6 +5,14 @@ import { convertWhereToMongo, evaluatePostFilter } from './convertWhere.js' import { ensureSearchIndex } from './indexes.js' import { RESERVED_FIELDS, type ResolvedPoolConfig } from './types.js' +const RESERVED_AND_META = new Set([ + ...RESERVED_FIELDS, + '_id', + 'score', + 'createdAt', + 'updatedAt', +]) + export interface MongoSearchCtx { uri: string dbName: string @@ -53,20 +61,10 @@ export async function searchImpl( if (pool.forceExact) vectorSearchStage.exact = true if (preFilter) vectorSearchStage.filter = preFilter - const projection: Record = { - _id: 1, - score: { $meta: 'vectorSearchScore' }, - sourceCollection: 1, - docId: 1, - chunkIndex: 1, - chunkText: 1, - embeddingVersion: 1, - } - for (const f of pool.filterableFields) projection[f] = 1 - const pipeline: Record[] = [ { $vectorSearch: vectorSearchStage }, - { $project: projection }, + { $addFields: { score: { $meta: 'vectorSearchScore' } } }, + { $project: { embedding: 0 } }, ] const collection = client.db(ctx.dbName).collection(pool.collectionName) @@ -76,19 +74,19 @@ export async function searchImpl( ? rawDocs.filter((d) => evaluatePostFilter(d as Record, postFilter!)) : rawDocs - return filtered.map((d) => mapDocToResult(d as Record, pool.filterableFields)) + return filtered.map((d) => mapDocToResult(d as Record)) } -function mapDocToResult( - doc: Record, - filterable: string[], -): VectorSearchResult { +function mapDocToResult(doc: Record): VectorSearchResult { if (typeof doc.score !== 'number') { throw new Error( - `[@payloadcms-vectorize/mongodb] Search result is missing numeric "score" field; ensure $project includes { score: { $meta: 'vectorSearchScore' } }`, + `[@payloadcms-vectorize/mongodb] Search result is missing numeric "score" field; ensure the pipeline adds { score: { $meta: 'vectorSearchScore' } }`, ) } - const result: Record = { + const extensionFields = Object.fromEntries( + Object.entries(doc).filter(([k]) => !RESERVED_AND_META.has(k)), + ) + return { id: String(doc._id), score: doc.score, sourceCollection: String(doc.sourceCollection ?? ''), @@ -97,11 +95,6 @@ function mapDocToResult( typeof doc.chunkIndex === 'number' ? doc.chunkIndex : Number(doc.chunkIndex ?? 0), chunkText: String(doc.chunkText ?? ''), embeddingVersion: String(doc.embeddingVersion ?? ''), - } - for (const f of filterable) { - if (f in doc && !(RESERVED_FIELDS as readonly string[]).includes(f)) { - result[f] = doc[f] - } - } - return result as VectorSearchResult + ...extensionFields, + } as VectorSearchResult }