KillrVideo
diff --git a/‎app/api/v1/endpoints/search_catalog.py‎
Lines changed: 18 additions & 3 deletions b/‎app/api/v1/endpoints/search_catalog.py‎
Lines changed: 18 additions & 3 deletions
diff --git a/‎app/services/video_service.py‎
Lines changed: 62 additions & 0 deletions b/‎app/services/video_service.py‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎docs/killrvideo_openapi.yaml‎
Lines changed: 12 additions & 0 deletions b/‎docs/killrvideo_openapi.yaml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎migrations/2025_08_vector.cql‎
Lines changed: 13 additions & 0 deletions b/‎migrations/2025_08_vector.cql‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎migrations/2025_08_vector.json‎
Lines changed: 36 additions & 0 deletions b/‎migrations/2025_08_vector.json‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎out.txt‎
Lines changed: 1 addition & 0 deletions b/‎out.txt‎
Lines changed: 1 addition & 0 deletions
@@ -1,13 +1,14 @@
 from __future__ import annotations
 
-from typing import Annotated, List
+from typing import Annotated, List, Literal
 
 from fastapi import APIRouter, Depends, Query
 
 from app.models.video import VideoSummary, TagSuggestion
 from app.models.common import PaginatedResponse, Pagination
 from app.api.v1.dependencies import PaginationParams
 from app.services import video_service
+from app.core.config import settings
 
 router = APIRouter(prefix="/search", tags=["Search"])
 
@@ -34,11 +35,25 @@ def _build_paginated_response(
 )
 async def search_videos(
     query: Annotated[str, Query(min_length=1, description="Search query term")],
+    mode: Literal["semantic", "keyword"] = Query(
+        default="keyword", description="Search mode: semantic or keyword"
+    ),
     pagination: PaginationParams = Depends(),
 ):
-    summaries, total = await video_service.search_videos_by_keyword(
-        query=query, page=pagination.page, page_size=pagination.pageSize
+    # Decide search strategy based on mode & feature flag
+    use_semantic = mode == "semantic" and getattr(
+        settings, "VECTOR_SEARCH_ENABLED", False
     )
+
+    if use_semantic:
+        summaries, total = await video_service.search_videos_by_semantic(
+            query=query, page=pagination.page, page_size=pagination.pageSize
+        )
+    else:
+        summaries, total = await video_service.search_videos_by_keyword(
+            query=query, page=pagination.page, page_size=pagination.pageSize
+        )
+
     return _build_paginated_response(summaries, total, pagination)
 
 
 
@@ -37,6 +37,7 @@
     MetadataFetchError,
 )
 from app.core.config import settings
+from app.utils.text import clip_to_512_tokens
 
 from astrapy.exceptions.data_api_exceptions import DataAPIResponseException
 
@@ -195,6 +196,24 @@ async def submit_new_video(
 
     full_doc = new_video.model_dump(by_alias=False, exclude_none=True)
 
+    # ------------------------------------------------------------------
+    # Build semantic embedding input string for NV-Embed auto-vectorisation.
+    # The Data API embeds *strings* via the `$vectorize` operator when they are
+    # stored in a ``vector`` column.  We therefore concatenate title,
+    # description, and tags into a single text blob and store it directly in
+    # the ``content_features`` field.  The vector will be generated
+    # server-side during the insert/update operation.
+    # ------------------------------------------------------------------
+
+    components: list[str] = [resolved_name]
+    if new_video.description:
+        components.append(new_video.description)
+    if new_video.tags:
+        components.append(" ".join(new_video.tags))
+
+    embedding_raw = "\n".join(components)
+    full_doc["content_features"] = clip_to_512_tokens(embedding_raw)
+
     # Ensure any HttpUrl instances are converted to plain strings so AstraDB
     # JSON encoder does not choke.  We purposely *do not* strip unknown
     # columns here because unit-tests rely on seeing them; schema filtering
@@ -708,6 +727,49 @@ async def search_videos_by_keyword(
     )
 
 
+# ---------------------------------------------------------------------------
+# Semantic (vector) search
+# ---------------------------------------------------------------------------
+
+
+async def search_videos_by_semantic(
+    query: str,
+    page: int,
+    page_size: int,
+    db_table: Optional[AstraDBCollection] = None,
+) -> Tuple[List[VideoSummary], int]:
+    """Return videos ranked by semantic similarity using Astra `$vectorize`.
+
+    Raises
+    ------
+    HTTPException
+        With status ``400`` if the query exceeds the NV-Embed 512-token limit.
+    """
+
+    # ------------------------------------------------------------------
+    # Validate token length against NVIDIA provider limit (512 tokens).
+    # ------------------------------------------------------------------
+
+    import re as _re
+
+    token_re = _re.compile(r"\w+|[^\w\s]", flags=_re.UNICODE)
+    if len(token_re.findall(query)) > 512:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Query exceeds 512-token limit for semantic search.",
+        )
+
+    sort_vector = {"$vectorize": query}
+
+    return await list_videos_with_query(
+        query_filter={},
+        page=page,
+        page_size=page_size,
+        sort_options=sort_vector,
+        db_table=db_table,
+    )
+
+
 # ---------------------------------------------------------------------------
 # Tag suggestions
 # ---------------------------------------------------------------------------
 
@@ -620,6 +620,18 @@ paths:
           description: Search query term
           title: Query
         description: Search query term
+      - name: mode
+        in: query
+        required: false
+        schema:
+          enum:
+          - semantic
+          - keyword
+          type: string
+          description: 'Search mode: semantic or keyword'
+          default: keyword
+          title: Mode
+        description: 'Search mode: semantic or keyword'
       - name: page
         in: query
         required: false
 
@@ -0,0 +1,13 @@
+-- Increase the vector dimension to 4096 and attach the NVIDIA embedding service
+ALTER TABLE killrvideo.videos ALTER content_features TYPE vector<float, 4096>;
+
+-- Drop the existing vector index if present
+DROP INDEX IF EXISTS videos_content_features_idx;
+
+-- Recreate the SAI index for the enlarged vector column using cosine similarity
+CREATE CUSTOM INDEX videos_content_features_idx
+ON killrvideo.videos (content_features)
+USING 'StorageAttachedIndex'
+WITH OPTIONS = {
+  'similarity_function': 'COSINE'
+}; 
@@ -0,0 +1,36 @@
+{
+  "commands": [
+    {
+      "alterTable": {
+        "name": "videos",
+        "alterColumns": {
+          "content_features": {
+            "type": "vector",
+            "dimension": 4096,
+            "service": {
+              "provider": "nvidia",
+              "modelName": "NV-Embed-QA"
+            }
+          }
+        }
+      }
+    },
+    {
+      "dropIndex": {
+        "name": "videos_content_features_idx",
+        "table": "videos"
+      }
+    },
+    {
+      "createIndex": {
+        "name": "videos_content_features_idx",
+        "table": "videos",
+        "column": "content_features",
+        "type": "vector",
+        "options": {
+          "similarity_function": "COSINE"
+        }
+      }
+    }
+  ]
+} 
@@ -0,0 +1 @@
+INFO:     127.0.0.1:50137 - "GET /api/v1/videos/by-uploader/80b9fa2f-ca43-4812-b50c-2a53b0758ae6?page=1&pageSize=20 HTTP/1.1" 200 OK
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+INFO: 127.0.0.1:50137 - "GET /api/v1/videos/by-uploader/80b9fa2f-ca43-4812-b50c-2a53b0758ae6?page=1&pageSize=20 HTTP/1.1" 200 OK`