From 95ac5b4ecb0acc4eccdc824e34d6172cc16af892 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Wed, 20 May 2026 14:35:33 +0100 Subject: [PATCH 1/5] update semantic kind name Signed-off-by: kerthcet --- alphatrion/tracing/clickhouse_exporter.py | 70 +++++++++++++++++------ alphatrion/tracing/span_processor.py | 17 +++++- 2 files changed, 70 insertions(+), 17 deletions(-) diff --git a/alphatrion/tracing/clickhouse_exporter.py b/alphatrion/tracing/clickhouse_exporter.py index 9a843626..114fd1ac 100644 --- a/alphatrion/tracing/clickhouse_exporter.py +++ b/alphatrion/tracing/clickhouse_exporter.py @@ -248,30 +248,68 @@ def force_flush(self, timeout_millis: int = 30000) -> bool: def determine_semantic_kind(attributes: dict[str, str]) -> str: """Determine the semantic kind of a span. + Priority order: + 1. Extended thinking/reasoning (LLM with reasoning tokens) + 2. Traceloop decorators (workflow, task, tool, agent) + 3. LLM operations (chat, completion, embeddings) + 4. Database operations + 5. HTTP operations + 6. Message queue operations + 7. Unknown fallback + Args: attributes: Span attributes Returns: - Semantic kind string + Semantic kind string (workflow, task, tool, chat, completion, + reasoning, db, http, messaging, unknown) """ + if not attributes: + return SEMANTIC_KIND_UNKNOWN - if ( - "gen_ai.usage.reasoning_tokens" in attributes - and int(attributes["gen_ai.usage.reasoning_tokens"]) > 0 - ): - return SEMANTIC_KIND_REASONING - - if "llm.request.type" in attributes: - return attributes["llm.request.type"] - - # Check for database operations + # Priority 1: Extended thinking/reasoning + # Check for LLM operations with reasoning tokens (o1, Claude extended thinking) + if "gen_ai.usage.reasoning_tokens" in attributes: + try: + reasoning_tokens = int(attributes["gen_ai.usage.reasoning_tokens"]) + if reasoning_tokens > 0: + return SEMANTIC_KIND_REASONING + except (ValueError, TypeError): + pass + + # Priority 2: Traceloop decorators (@workflow, @task, @tool) + # These are explicitly decorated by developers and should take precedence + if "traceloop.span.kind" in attributes: + traceloop_kind = attributes["traceloop.span.kind"] + # Valid values: workflow, task, tool, agent + if traceloop_kind in ("workflow", "task", "tool", "agent"): + return traceloop_kind + + # Priority 3: LLM operations (auto-instrumented by Traceloop) + # Check for GenAI operations from OpenTelemetry semantic conventions + if "gen_ai.operation.name" in attributes: + operation = attributes["gen_ai.operation.name"] + # Common values: chat, completion, embeddings + return operation + + # Priority 4: Database operations + # Auto-instrumented by OpenTelemetry (psycopg2, SQLAlchemy, etc.) if "db.system" in attributes or "db.statement" in attributes: return SEMANTIC_KIND_DB - # One of workflow, task, agent, tool - if "traceloop.span.kind" in attributes: - traceloop_kind = attributes["traceloop.span.kind"] - return traceloop_kind + # Priority 5: HTTP operations + # Auto-instrumented by OpenTelemetry (requests, httpx, urllib3, etc.) + if "http.method" in attributes or "http.request.method" in attributes: + return "http" + + # Priority 6: Messaging/Queue operations + # Auto-instrumented by OpenTelemetry (RabbitMQ, Kafka, SQS, etc.) + if "messaging.system" in attributes: + return "messaging" + + # Priority 7: RPC operations + if "rpc.system" in attributes: + return "rpc" - # Default to unknown + # Default: unknown return SEMANTIC_KIND_UNKNOWN diff --git a/alphatrion/tracing/span_processor.py b/alphatrion/tracing/span_processor.py index 48e59660..bdaa8885 100644 --- a/alphatrion/tracing/span_processor.py +++ b/alphatrion/tracing/span_processor.py @@ -11,10 +11,25 @@ logger = logging.getLogger(__name__) # Semantic kind enums: +# Core application spans (decorated with @workflow, @task, @tool) +SEMANTIC_KIND_WORKFLOW = "workflow" +SEMANTIC_KIND_TASK = "task" SEMANTIC_KIND_TOOL = "tool" -SEMANTIC_KIND_REASONING = "reasoning" +SEMANTIC_KIND_AGENT = "agent" + +# LLM operations SEMANTIC_KIND_CHAT = "chat" +SEMANTIC_KIND_COMPLETION = "completion" +SEMANTIC_KIND_EMBEDDINGS = "embeddings" +SEMANTIC_KIND_REASONING = "reasoning" + +# Infrastructure operations SEMANTIC_KIND_DB = "db" +SEMANTIC_KIND_HTTP = "http" +SEMANTIC_KIND_MESSAGING = "messaging" +SEMANTIC_KIND_RPC = "rpc" + +# Fallback SEMANTIC_KIND_UNKNOWN = "unknown" From b86d943d0525e57e77813e2f5c8d8e88479fcfaa Mon Sep 17 00:00:00 2001 From: kerthcet Date: Wed, 20 May 2026 14:42:11 +0100 Subject: [PATCH 2/5] do not use magic string Signed-off-by: kerthcet --- alphatrion/tracing/clickhouse_exporter.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/alphatrion/tracing/clickhouse_exporter.py b/alphatrion/tracing/clickhouse_exporter.py index 114fd1ac..fb4029e0 100644 --- a/alphatrion/tracing/clickhouse_exporter.py +++ b/alphatrion/tracing/clickhouse_exporter.py @@ -10,7 +10,10 @@ from alphatrion.storage.tracestore import TraceStore from alphatrion.tracing.span_processor import ( SEMANTIC_KIND_DB, + SEMANTIC_KIND_HTTP, + SEMANTIC_KIND_MESSAGING, SEMANTIC_KIND_REASONING, + SEMANTIC_KIND_RPC, SEMANTIC_KIND_UNKNOWN, ) @@ -300,16 +303,16 @@ def determine_semantic_kind(attributes: dict[str, str]) -> str: # Priority 5: HTTP operations # Auto-instrumented by OpenTelemetry (requests, httpx, urllib3, etc.) if "http.method" in attributes or "http.request.method" in attributes: - return "http" + return SEMANTIC_KIND_HTTP # Priority 6: Messaging/Queue operations # Auto-instrumented by OpenTelemetry (RabbitMQ, Kafka, SQS, etc.) if "messaging.system" in attributes: - return "messaging" + return SEMANTIC_KIND_MESSAGING # Priority 7: RPC operations if "rpc.system" in attributes: - return "rpc" + return SEMANTIC_KIND_RPC # Default: unknown return SEMANTIC_KIND_UNKNOWN From 677949a2787a16b716aa0f9c3d957b50290b2b01 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Wed, 20 May 2026 14:44:17 +0100 Subject: [PATCH 3/5] fix reasoning semantic kind Signed-off-by: kerthcet --- alphatrion/tracing/clickhouse_exporter.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/alphatrion/tracing/clickhouse_exporter.py b/alphatrion/tracing/clickhouse_exporter.py index fb4029e0..93b0a0c7 100644 --- a/alphatrion/tracing/clickhouse_exporter.py +++ b/alphatrion/tracing/clickhouse_exporter.py @@ -273,12 +273,7 @@ def determine_semantic_kind(attributes: dict[str, str]) -> str: # Priority 1: Extended thinking/reasoning # Check for LLM operations with reasoning tokens (o1, Claude extended thinking) if "gen_ai.usage.reasoning_tokens" in attributes: - try: - reasoning_tokens = int(attributes["gen_ai.usage.reasoning_tokens"]) - if reasoning_tokens > 0: - return SEMANTIC_KIND_REASONING - except (ValueError, TypeError): - pass + return SEMANTIC_KIND_REASONING # Priority 2: Traceloop decorators (@workflow, @task, @tool) # These are explicitly decorated by developers and should take precedence From f0991511d08131f3f986fa784cc601ddfb4a0c68 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Wed, 20 May 2026 14:45:08 +0100 Subject: [PATCH 4/5] update comment Signed-off-by: kerthcet --- alphatrion/tracing/clickhouse_exporter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alphatrion/tracing/clickhouse_exporter.py b/alphatrion/tracing/clickhouse_exporter.py index 93b0a0c7..557eb9c5 100644 --- a/alphatrion/tracing/clickhouse_exporter.py +++ b/alphatrion/tracing/clickhouse_exporter.py @@ -264,8 +264,8 @@ def determine_semantic_kind(attributes: dict[str, str]) -> str: attributes: Span attributes Returns: - Semantic kind string (workflow, task, tool, chat, completion, - reasoning, db, http, messaging, unknown) + Semantic kind string (workflow, task, tool, agent, chat, completion, + embeddings, reasoning, db, http, messaging, rpc, unknown) """ if not attributes: return SEMANTIC_KIND_UNKNOWN From a30dc1a79a12f980fd73f0ac7b16628b493a048f Mon Sep 17 00:00:00 2001 From: kerthcet Date: Wed, 20 May 2026 14:53:09 +0100 Subject: [PATCH 5/5] fix tests Signed-off-by: kerthcet --- tests/integration/test_tracking.py | 40 ------------------------------ 1 file changed, 40 deletions(-) diff --git a/tests/integration/test_tracking.py b/tests/integration/test_tracking.py index 56b8697a..538d962c 100644 --- a/tests/integration/test_tracking.py +++ b/tests/integration/test_tracking.py @@ -125,46 +125,6 @@ async def token_workflow(): # Query spans with token data (use tracestore database name) database = tracestore.database - # Debug: Check if any spans exist for this experiment - debug_query = f""" - SELECT COUNT(*) as count - FROM {database}.otel_spans - WHERE ExperimentId = '{experiment_id}' - """ - total_spans = tracestore.client.query(debug_query).result_rows[0][0] - print(f"DEBUG: Total spans for experiment {experiment_id}: {total_spans}") - - # Debug: Check what span names we have - debug_query_names = f""" - SELECT SpanName, COUNT(*) as count - FROM {database}.otel_spans - WHERE ExperimentId = '{experiment_id}' - GROUP BY SpanName - ORDER BY count DESC - """ - span_names = tracestore.client.query(debug_query_names).result_rows - print(f"DEBUG: Span names: {span_names}") - - # Debug: Check what attributes exist in spans - debug_query_attrs = f""" - SELECT SpanName, mapKeys(SpanAttributes) as attr_keys - FROM {database}.otel_spans - WHERE ExperimentId = '{experiment_id}' - LIMIT 5 - """ - span_attrs = tracestore.client.query(debug_query_attrs).result_rows - print(f"DEBUG: Sample span attributes: {span_attrs}") - - # Debug: Check spans with gen_ai.usage attributes - debug_query2 = f""" - SELECT COUNT(*) as count - FROM {database}.otel_spans - WHERE ExperimentId = '{experiment_id}' - AND mapContains(SpanAttributes, 'gen_ai.usage.input_tokens') - """ - llm_spans = tracestore.client.query(debug_query2).result_rows[0][0] - print(f"DEBUG: Spans with gen_ai.usage.input_tokens: {llm_spans}") - query = f""" SELECT SpanId as span_id,