From 95ac5b4ecb0acc4eccdc824e34d6172cc16af892 Mon Sep 17 00:00:00 2001
From: kerthcet <kerthcet@gmail.com>
Date: Wed, 20 May 2026 14:35:33 +0100
Subject: [PATCH 1/5] update semantic kind name

Signed-off-by: kerthcet <kerthcet@gmail.com>
---
 alphatrion/tracing/clickhouse_exporter.py | 70 +++++++++++++++++------
 alphatrion/tracing/span_processor.py      | 17 +++++-
 2 files changed, 70 insertions(+), 17 deletions(-)

diff --git a/alphatrion/tracing/clickhouse_exporter.py b/alphatrion/tracing/clickhouse_exporter.py
index 9a843626..114fd1ac 100644
--- a/alphatrion/tracing/clickhouse_exporter.py
+++ b/alphatrion/tracing/clickhouse_exporter.py
@@ -248,30 +248,68 @@ def force_flush(self, timeout_millis: int = 30000) -> bool:
 def determine_semantic_kind(attributes: dict[str, str]) -> str:
     """Determine the semantic kind of a span.
 
+    Priority order:
+    1. Extended thinking/reasoning (LLM with reasoning tokens)
+    2. Traceloop decorators (workflow, task, tool, agent)
+    3. LLM operations (chat, completion, embeddings)
+    4. Database operations
+    5. HTTP operations
+    6. Message queue operations
+    7. Unknown fallback
+
     Args:
         attributes: Span attributes
 
     Returns:
-        Semantic kind string
+        Semantic kind string (workflow, task, tool, chat, completion,
+        reasoning, db, http, messaging, unknown)
     """
+    if not attributes:
+        return SEMANTIC_KIND_UNKNOWN
 
-    if (
-        "gen_ai.usage.reasoning_tokens" in attributes
-        and int(attributes["gen_ai.usage.reasoning_tokens"]) > 0
-    ):
-        return SEMANTIC_KIND_REASONING
-
-    if "llm.request.type" in attributes:
-        return attributes["llm.request.type"]
-
-    # Check for database operations
+    # Priority 1: Extended thinking/reasoning
+    # Check for LLM operations with reasoning tokens (o1, Claude extended thinking)
+    if "gen_ai.usage.reasoning_tokens" in attributes:
+        try:
+            reasoning_tokens = int(attributes["gen_ai.usage.reasoning_tokens"])
+            if reasoning_tokens > 0:
+                return SEMANTIC_KIND_REASONING
+        except (ValueError, TypeError):
+            pass
+
+    # Priority 2: Traceloop decorators (@workflow, @task, @tool)
+    # These are explicitly decorated by developers and should take precedence
+    if "traceloop.span.kind" in attributes:
+        traceloop_kind = attributes["traceloop.span.kind"]
+        # Valid values: workflow, task, tool, agent
+        if traceloop_kind in ("workflow", "task", "tool", "agent"):
+            return traceloop_kind
+
+    # Priority 3: LLM operations (auto-instrumented by Traceloop)
+    # Check for GenAI operations from OpenTelemetry semantic conventions
+    if "gen_ai.operation.name" in attributes:
+        operation = attributes["gen_ai.operation.name"]
+        # Common values: chat, completion, embeddings
+        return operation
+
+    # Priority 4: Database operations
+    # Auto-instrumented by OpenTelemetry (psycopg2, SQLAlchemy, etc.)
     if "db.system" in attributes or "db.statement" in attributes:
         return SEMANTIC_KIND_DB
 
-    # One of workflow, task, agent, tool
-    if "traceloop.span.kind" in attributes:
-        traceloop_kind = attributes["traceloop.span.kind"]
-        return traceloop_kind
+    # Priority 5: HTTP operations
+    # Auto-instrumented by OpenTelemetry (requests, httpx, urllib3, etc.)
+    if "http.method" in attributes or "http.request.method" in attributes:
+        return "http"
+
+    # Priority 6: Messaging/Queue operations
+    # Auto-instrumented by OpenTelemetry (RabbitMQ, Kafka, SQS, etc.)
+    if "messaging.system" in attributes:
+        return "messaging"
+
+    # Priority 7: RPC operations
+    if "rpc.system" in attributes:
+        return "rpc"
 
-    # Default to unknown
+    # Default: unknown
     return SEMANTIC_KIND_UNKNOWN
diff --git a/alphatrion/tracing/span_processor.py b/alphatrion/tracing/span_processor.py
index 48e59660..bdaa8885 100644
--- a/alphatrion/tracing/span_processor.py
+++ b/alphatrion/tracing/span_processor.py
@@ -11,10 +11,25 @@
 logger = logging.getLogger(__name__)
 
 # Semantic kind enums:
+# Core application spans (decorated with @workflow, @task, @tool)
+SEMANTIC_KIND_WORKFLOW = "workflow"
+SEMANTIC_KIND_TASK = "task"
 SEMANTIC_KIND_TOOL = "tool"
-SEMANTIC_KIND_REASONING = "reasoning"
+SEMANTIC_KIND_AGENT = "agent"
+
+# LLM operations
 SEMANTIC_KIND_CHAT = "chat"
+SEMANTIC_KIND_COMPLETION = "completion"
+SEMANTIC_KIND_EMBEDDINGS = "embeddings"
+SEMANTIC_KIND_REASONING = "reasoning"
+
+# Infrastructure operations
 SEMANTIC_KIND_DB = "db"
+SEMANTIC_KIND_HTTP = "http"
+SEMANTIC_KIND_MESSAGING = "messaging"
+SEMANTIC_KIND_RPC = "rpc"
+
+# Fallback
 SEMANTIC_KIND_UNKNOWN = "unknown"
 
 

From b86d943d0525e57e77813e2f5c8d8e88479fcfaa Mon Sep 17 00:00:00 2001
From: kerthcet <kerthcet@gmail.com>
Date: Wed, 20 May 2026 14:42:11 +0100
Subject: [PATCH 2/5] do not use magic string

Signed-off-by: kerthcet <kerthcet@gmail.com>
---
 alphatrion/tracing/clickhouse_exporter.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/alphatrion/tracing/clickhouse_exporter.py b/alphatrion/tracing/clickhouse_exporter.py
index 114fd1ac..fb4029e0 100644
--- a/alphatrion/tracing/clickhouse_exporter.py
+++ b/alphatrion/tracing/clickhouse_exporter.py
@@ -10,7 +10,10 @@
 from alphatrion.storage.tracestore import TraceStore
 from alphatrion.tracing.span_processor import (
     SEMANTIC_KIND_DB,
+    SEMANTIC_KIND_HTTP,
+    SEMANTIC_KIND_MESSAGING,
     SEMANTIC_KIND_REASONING,
+    SEMANTIC_KIND_RPC,
     SEMANTIC_KIND_UNKNOWN,
 )
 
@@ -300,16 +303,16 @@ def determine_semantic_kind(attributes: dict[str, str]) -> str:
     # Priority 5: HTTP operations
     # Auto-instrumented by OpenTelemetry (requests, httpx, urllib3, etc.)
     if "http.method" in attributes or "http.request.method" in attributes:
-        return "http"
+        return SEMANTIC_KIND_HTTP
 
     # Priority 6: Messaging/Queue operations
     # Auto-instrumented by OpenTelemetry (RabbitMQ, Kafka, SQS, etc.)
     if "messaging.system" in attributes:
-        return "messaging"
+        return SEMANTIC_KIND_MESSAGING
 
     # Priority 7: RPC operations
     if "rpc.system" in attributes:
-        return "rpc"
+        return SEMANTIC_KIND_RPC
 
     # Default: unknown
     return SEMANTIC_KIND_UNKNOWN

From 677949a2787a16b716aa0f9c3d957b50290b2b01 Mon Sep 17 00:00:00 2001
From: kerthcet <kerthcet@gmail.com>
Date: Wed, 20 May 2026 14:44:17 +0100
Subject: [PATCH 3/5] fix reasoning semantic kind

Signed-off-by: kerthcet <kerthcet@gmail.com>
---
 alphatrion/tracing/clickhouse_exporter.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/alphatrion/tracing/clickhouse_exporter.py b/alphatrion/tracing/clickhouse_exporter.py
index fb4029e0..93b0a0c7 100644
--- a/alphatrion/tracing/clickhouse_exporter.py
+++ b/alphatrion/tracing/clickhouse_exporter.py
@@ -273,12 +273,7 @@ def determine_semantic_kind(attributes: dict[str, str]) -> str:
     # Priority 1: Extended thinking/reasoning
     # Check for LLM operations with reasoning tokens (o1, Claude extended thinking)
     if "gen_ai.usage.reasoning_tokens" in attributes:
-        try:
-            reasoning_tokens = int(attributes["gen_ai.usage.reasoning_tokens"])
-            if reasoning_tokens > 0:
-                return SEMANTIC_KIND_REASONING
-        except (ValueError, TypeError):
-            pass
+        return SEMANTIC_KIND_REASONING
 
     # Priority 2: Traceloop decorators (@workflow, @task, @tool)
     # These are explicitly decorated by developers and should take precedence

From f0991511d08131f3f986fa784cc601ddfb4a0c68 Mon Sep 17 00:00:00 2001
From: kerthcet <kerthcet@gmail.com>
Date: Wed, 20 May 2026 14:45:08 +0100
Subject: [PATCH 4/5] update comment

Signed-off-by: kerthcet <kerthcet@gmail.com>
---
 alphatrion/tracing/clickhouse_exporter.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/alphatrion/tracing/clickhouse_exporter.py b/alphatrion/tracing/clickhouse_exporter.py
index 93b0a0c7..557eb9c5 100644
--- a/alphatrion/tracing/clickhouse_exporter.py
+++ b/alphatrion/tracing/clickhouse_exporter.py
@@ -264,8 +264,8 @@ def determine_semantic_kind(attributes: dict[str, str]) -> str:
         attributes: Span attributes
 
     Returns:
-        Semantic kind string (workflow, task, tool, chat, completion,
-        reasoning, db, http, messaging, unknown)
+        Semantic kind string (workflow, task, tool, agent, chat, completion,
+        embeddings, reasoning, db, http, messaging, rpc, unknown)
     """
     if not attributes:
         return SEMANTIC_KIND_UNKNOWN

From a30dc1a79a12f980fd73f0ac7b16628b493a048f Mon Sep 17 00:00:00 2001
From: kerthcet <kerthcet@gmail.com>
Date: Wed, 20 May 2026 14:53:09 +0100
Subject: [PATCH 5/5] fix tests

Signed-off-by: kerthcet <kerthcet@gmail.com>
---
 tests/integration/test_tracking.py | 40 ------------------------------
 1 file changed, 40 deletions(-)

diff --git a/tests/integration/test_tracking.py b/tests/integration/test_tracking.py
index 56b8697a..538d962c 100644
--- a/tests/integration/test_tracking.py
+++ b/tests/integration/test_tracking.py
@@ -125,46 +125,6 @@ async def token_workflow():
     # Query spans with token data (use tracestore database name)
     database = tracestore.database
 
-    # Debug: Check if any spans exist for this experiment
-    debug_query = f"""
-    SELECT COUNT(*) as count
-    FROM {database}.otel_spans
-    WHERE ExperimentId = '{experiment_id}'
-    """
-    total_spans = tracestore.client.query(debug_query).result_rows[0][0]
-    print(f"DEBUG: Total spans for experiment {experiment_id}: {total_spans}")
-
-    # Debug: Check what span names we have
-    debug_query_names = f"""
-    SELECT SpanName, COUNT(*) as count
-    FROM {database}.otel_spans
-    WHERE ExperimentId = '{experiment_id}'
-    GROUP BY SpanName
-    ORDER BY count DESC
-    """
-    span_names = tracestore.client.query(debug_query_names).result_rows
-    print(f"DEBUG: Span names: {span_names}")
-
-    # Debug: Check what attributes exist in spans
-    debug_query_attrs = f"""
-    SELECT SpanName, mapKeys(SpanAttributes) as attr_keys
-    FROM {database}.otel_spans
-    WHERE ExperimentId = '{experiment_id}'
-    LIMIT 5
-    """
-    span_attrs = tracestore.client.query(debug_query_attrs).result_rows
-    print(f"DEBUG: Sample span attributes: {span_attrs}")
-
-    # Debug: Check spans with gen_ai.usage attributes
-    debug_query2 = f"""
-    SELECT COUNT(*) as count
-    FROM {database}.otel_spans
-    WHERE ExperimentId = '{experiment_id}'
-        AND mapContains(SpanAttributes, 'gen_ai.usage.input_tokens')
-    """
-    llm_spans = tracestore.client.query(debug_query2).result_rows[0][0]
-    print(f"DEBUG: Spans with gen_ai.usage.input_tokens: {llm_spans}")
-
     query = f"""
     SELECT
         SpanId as span_id,