Skip to content

Commit 65eb582

Browse files
committed
Vector search implementation - prompts 3 to 7 completed
1 parent 2872b4b commit 65eb582

11 files changed

Lines changed: 600 additions & 3 deletions

File tree

app/api/v1/endpoints/search_catalog.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
from __future__ import annotations
22

3-
from typing import Annotated, List
3+
from typing import Annotated, List, Literal
44

55
from fastapi import APIRouter, Depends, Query
66

77
from app.models.video import VideoSummary, TagSuggestion
88
from app.models.common import PaginatedResponse, Pagination
99
from app.api.v1.dependencies import PaginationParams
1010
from app.services import video_service
11+
from app.core.config import settings
1112

1213
router = APIRouter(prefix="/search", tags=["Search"])
1314

@@ -34,11 +35,25 @@ def _build_paginated_response(
3435
)
3536
async def search_videos(
3637
query: Annotated[str, Query(min_length=1, description="Search query term")],
38+
mode: Literal["semantic", "keyword"] = Query(
39+
default="keyword", description="Search mode: semantic or keyword"
40+
),
3741
pagination: PaginationParams = Depends(),
3842
):
39-
summaries, total = await video_service.search_videos_by_keyword(
40-
query=query, page=pagination.page, page_size=pagination.pageSize
43+
# Decide search strategy based on mode & feature flag
44+
use_semantic = mode == "semantic" and getattr(
45+
settings, "VECTOR_SEARCH_ENABLED", False
4146
)
47+
48+
if use_semantic:
49+
summaries, total = await video_service.search_videos_by_semantic(
50+
query=query, page=pagination.page, page_size=pagination.pageSize
51+
)
52+
else:
53+
summaries, total = await video_service.search_videos_by_keyword(
54+
query=query, page=pagination.page, page_size=pagination.pageSize
55+
)
56+
4257
return _build_paginated_response(summaries, total, pagination)
4358

4459

app/services/video_service.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
MetadataFetchError,
3838
)
3939
from app.core.config import settings
40+
from app.utils.text import clip_to_512_tokens
4041

4142
from astrapy.exceptions.data_api_exceptions import DataAPIResponseException
4243

@@ -195,6 +196,24 @@ async def submit_new_video(
195196

196197
full_doc = new_video.model_dump(by_alias=False, exclude_none=True)
197198

199+
# ------------------------------------------------------------------
200+
# Build semantic embedding input string for NV-Embed auto-vectorisation.
201+
# The Data API embeds *strings* via the `$vectorize` operator when they are
202+
# stored in a ``vector`` column. We therefore concatenate title,
203+
# description, and tags into a single text blob and store it directly in
204+
# the ``content_features`` field. The vector will be generated
205+
# server-side during the insert/update operation.
206+
# ------------------------------------------------------------------
207+
208+
components: list[str] = [resolved_name]
209+
if new_video.description:
210+
components.append(new_video.description)
211+
if new_video.tags:
212+
components.append(" ".join(new_video.tags))
213+
214+
embedding_raw = "\n".join(components)
215+
full_doc["content_features"] = clip_to_512_tokens(embedding_raw)
216+
198217
# Ensure any HttpUrl instances are converted to plain strings so AstraDB
199218
# JSON encoder does not choke. We purposely *do not* strip unknown
200219
# columns here because unit-tests rely on seeing them; schema filtering
@@ -708,6 +727,49 @@ async def search_videos_by_keyword(
708727
)
709728

710729

730+
# ---------------------------------------------------------------------------
731+
# Semantic (vector) search
732+
# ---------------------------------------------------------------------------
733+
734+
735+
async def search_videos_by_semantic(
736+
query: str,
737+
page: int,
738+
page_size: int,
739+
db_table: Optional[AstraDBCollection] = None,
740+
) -> Tuple[List[VideoSummary], int]:
741+
"""Return videos ranked by semantic similarity using Astra `$vectorize`.
742+
743+
Raises
744+
------
745+
HTTPException
746+
With status ``400`` if the query exceeds the NV-Embed 512-token limit.
747+
"""
748+
749+
# ------------------------------------------------------------------
750+
# Validate token length against NVIDIA provider limit (512 tokens).
751+
# ------------------------------------------------------------------
752+
753+
import re as _re
754+
755+
token_re = _re.compile(r"\w+|[^\w\s]", flags=_re.UNICODE)
756+
if len(token_re.findall(query)) > 512:
757+
raise HTTPException(
758+
status_code=status.HTTP_400_BAD_REQUEST,
759+
detail="Query exceeds 512-token limit for semantic search.",
760+
)
761+
762+
sort_vector = {"$vectorize": query}
763+
764+
return await list_videos_with_query(
765+
query_filter={},
766+
page=page,
767+
page_size=page_size,
768+
sort_options=sort_vector,
769+
db_table=db_table,
770+
)
771+
772+
711773
# ---------------------------------------------------------------------------
712774
# Tag suggestions
713775
# ---------------------------------------------------------------------------

docs/killrvideo_openapi.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,18 @@ paths:
620620
description: Search query term
621621
title: Query
622622
description: Search query term
623+
- name: mode
624+
in: query
625+
required: false
626+
schema:
627+
enum:
628+
- semantic
629+
- keyword
630+
type: string
631+
description: 'Search mode: semantic or keyword'
632+
default: keyword
633+
title: Mode
634+
description: 'Search mode: semantic or keyword'
623635
- name: page
624636
in: query
625637
required: false

migrations/2025_08_vector.cql

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
-- Increase the vector dimension to 4096 and attach the NVIDIA embedding service
2+
ALTER TABLE killrvideo.videos ALTER content_features TYPE vector<float, 4096>;
3+
4+
-- Drop the existing vector index if present
5+
DROP INDEX IF EXISTS videos_content_features_idx;
6+
7+
-- Recreate the SAI index for the enlarged vector column using cosine similarity
8+
CREATE CUSTOM INDEX videos_content_features_idx
9+
ON killrvideo.videos (content_features)
10+
USING 'StorageAttachedIndex'
11+
WITH OPTIONS = {
12+
'similarity_function': 'COSINE'
13+
};

migrations/2025_08_vector.json

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
{
2+
"commands": [
3+
{
4+
"alterTable": {
5+
"name": "videos",
6+
"alterColumns": {
7+
"content_features": {
8+
"type": "vector",
9+
"dimension": 4096,
10+
"service": {
11+
"provider": "nvidia",
12+
"modelName": "NV-Embed-QA"
13+
}
14+
}
15+
}
16+
}
17+
},
18+
{
19+
"dropIndex": {
20+
"name": "videos_content_features_idx",
21+
"table": "videos"
22+
}
23+
},
24+
{
25+
"createIndex": {
26+
"name": "videos_content_features_idx",
27+
"table": "videos",
28+
"column": "content_features",
29+
"type": "vector",
30+
"options": {
31+
"similarity_function": "COSINE"
32+
}
33+
}
34+
}
35+
]
36+
}

out.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
INFO: 127.0.0.1:50137 - "GET /api/v1/videos/by-uploader/80b9fa2f-ca43-4812-b50c-2a53b0758ae6?page=1&pageSize=20 HTTP/1.1" 200 OK

0 commit comments

Comments
 (0)