|
| 1 | +from datetime import timedelta |
| 2 | + |
| 3 | +from temporalio import activity, workflow |
| 4 | +from temporalio.common import RetryPolicy |
| 5 | +from temporalio.workflow import execute_activity |
| 6 | +from .schema import IngestionRequest |
| 7 | + |
| 8 | +with workflow.unsafe.imports_passed_through(): |
| 9 | + from tc_hivemind_backend.ingest_qdrant import CustomIngestionPipeline |
| 10 | + from llama_index.core import Document |
| 11 | + |
| 12 | + |
| 13 | +@workflow.defn |
| 14 | +class IngestionWorkflow: |
| 15 | + """A Temporal workflow for processing document ingestion requests. |
| 16 | +
|
| 17 | + This workflow handles the orchestration of document processing activities, |
| 18 | + including retry logic and timeout configurations. |
| 19 | + """ |
| 20 | + |
| 21 | + @workflow.run |
| 22 | + async def run(self, ingestion_request: IngestionRequest) -> None: |
| 23 | + """Execute the ingestion workflow. |
| 24 | +
|
| 25 | + Parameters |
| 26 | + ---------- |
| 27 | + ingestion_request : IngestionRequest |
| 28 | + The request containing all necessary information for document processing, |
| 29 | + including community ID, platform ID, text content, and metadata. |
| 30 | +
|
| 31 | + Notes |
| 32 | + ----- |
| 33 | + The workflow implements a retry policy with the following configuration: |
| 34 | + - Initial retry interval: 1 second |
| 35 | + - Maximum retry interval: 1 minute |
| 36 | + - Maximum retry attempts: 3 |
| 37 | + - Activity timeout: 5 minutes |
| 38 | + """ |
| 39 | + retry_policy = RetryPolicy( |
| 40 | + initial_interval=timedelta(seconds=1), |
| 41 | + maximum_interval=timedelta(minutes=1), |
| 42 | + maximum_attempts=3, |
| 43 | + ) |
| 44 | + |
| 45 | + await execute_activity( |
| 46 | + process_document, |
| 47 | + ingestion_request, |
| 48 | + retry_policy=retry_policy, |
| 49 | + start_to_close_timeout=timedelta(minutes=5), |
| 50 | + ) |
| 51 | + |
| 52 | + |
| 53 | +@activity.defn |
| 54 | +async def process_document( |
| 55 | + ingestion_request: IngestionRequest, |
| 56 | +) -> None: |
| 57 | + """Process the document according to the ingestion request specifications. |
| 58 | +
|
| 59 | + Parameters |
| 60 | + ---------- |
| 61 | + ingestion_request : IngestionRequest |
| 62 | + The request containing all necessary information for document processing, |
| 63 | + including community ID, platform ID, text content, and metadata. |
| 64 | +
|
| 65 | + Notes |
| 66 | + ----- |
| 67 | + This activity will be implemented by the user to handle the actual document |
| 68 | + processing logic, including any necessary embedding or LLM operations. |
| 69 | + """ |
| 70 | + if ingestion_request.collectionName is None: |
| 71 | + collection_name = ( |
| 72 | + f"{ingestion_request.communityId}_{ingestion_request.platformId}" |
| 73 | + ) |
| 74 | + else: |
| 75 | + collection_name = ingestion_request.collectionName |
| 76 | + |
| 77 | + # Initialize the ingestion pipeline |
| 78 | + pipeline = CustomIngestionPipeline( |
| 79 | + community_id=ingestion_request.communityId, |
| 80 | + collection_name=collection_name, |
| 81 | + ) |
| 82 | + |
| 83 | + document = Document( |
| 84 | + doc_id=ingestion_request.docId, |
| 85 | + text=ingestion_request.text, |
| 86 | + metadata=ingestion_request.metadata, |
| 87 | + ) |
| 88 | + |
| 89 | + pipeline.run_pipeline(docs=[document]) |
0 commit comments