Skip to content

Commit 578171c

Browse files
committed
fix: normalize cross-provider scores per-provider instead of globally
1 parent 97c0c71 commit 578171c

1 file changed

Lines changed: 16 additions & 10 deletions

File tree

  • apps/sim/app/api/knowledge/search

apps/sim/app/api/knowledge/search/route.ts

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -420,19 +420,25 @@ export async function POST(request: NextRequest) {
420420
}
421421

422422
// When mixing results from different embedding spaces (OpenAI + Ollama), raw
423-
// cosine distances are not directly comparable. Normalize to [0, 1] only in
424-
// that case so existing consumers of single-provider similarity scores are
425-
// unaffected.
423+
// cosine distances are not directly comparable. Normalize each provider's
424+
// results independently to [0, 1] so neither provider dominates based on its
425+
// own absolute distance range, then merge. Single-provider searches are
426+
// returned unchanged to preserve the original score semantics.
426427
const isMixedProviders = openaiKbIds.length > 0 && ollamaKbIds.length > 0
427-
const normalizeScores = (items: SearchResult[]): SearchResult[] => {
428-
if (items.length <= 1) return items
429-
const min = Math.min(...items.map((r) => r.distance))
430-
const max = Math.max(...items.map((r) => r.distance))
431-
const range = max - min || 1
432-
return items.map((r) => ({ ...r, distance: (r.distance - min) / range }))
428+
const normalizeByProvider = (items: SearchResult[]): SearchResult[] => {
429+
const normalizeGroup = (group: SearchResult[]): SearchResult[] => {
430+
if (group.length <= 1) return group
431+
const min = Math.min(...group.map((r) => r.distance))
432+
const max = Math.max(...group.map((r) => r.distance))
433+
const range = max - min || 1
434+
return group.map((r) => ({ ...r, distance: (r.distance - min) / range }))
435+
}
436+
const openaiGroup = items.filter((r) => openaiKbIds.includes(r.knowledgeBaseId))
437+
const ollamaGroup = items.filter((r) => ollamaKbIds.includes(r.knowledgeBaseId))
438+
return [...normalizeGroup(openaiGroup), ...normalizeGroup(ollamaGroup)]
433439
}
434440

435-
const results: SearchResult[] = (isMixedProviders ? normalizeScores(allResults) : allResults)
441+
const results: SearchResult[] = (isMixedProviders ? normalizeByProvider(allResults) : allResults)
436442
.sort((a, b) => a.distance - b.distance)
437443
.slice(0, validatedData.topK)
438444

0 commit comments

Comments
 (0)