Skip to content

Commit 800088a

Browse files
committed
feat(observability): enhance AstraDB instrumentation and update dependencies
- Added runtime patching for AstraDBCollection methods to instrument database operations with OpenTelemetry spans and Prometheus histograms. - Updated observability settings in `config.py` to include new OTLP protocol options and headers. - Enhanced logging configuration to support JSON formatting and improved error handling for logging setup. - Updated dependencies in `pyproject.toml` and `poetry.lock` for FastAPI, cryptography, and other packages to their latest versions. - Introduced a GitHub workflow for validating JSON files in the Grafana dashboard directory.
1 parent 92286c4 commit 800088a

9 files changed

Lines changed: 908 additions & 140 deletions

File tree

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
name: Grafana JSON lint
2+
3+
on:
4+
push:
5+
paths:
6+
- 'docs/grafana/*.json'
7+
pull_request:
8+
paths:
9+
- 'docs/grafana/*.json'
10+
11+
jobs:
12+
json-lint:
13+
runs-on: ubuntu-latest
14+
steps:
15+
- uses: actions/checkout@v4
16+
- name: Validate JSON
17+
run: |
18+
sudo apt-get update -y
19+
sudo apt-get install -y jq
20+
for file in docs/grafana/*.json; do
21+
echo "Linting $file";
22+
jq type "$file" > /dev/null;
23+
done

app/core/config.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,10 @@ def _sanitize_bool_tokens(cls, data): # type: ignore[return-value]
156156
# initialised. This is useful for local development or extremely
157157
# resource-constrained environments where you don't want the overhead of
158158
# telemetry.
159-
OBSERVABILITY_ENABLED: bool = Field(default=True, description="Globally enable/disable all extra observability (metrics/traces/log shipping).")
159+
OBSERVABILITY_ENABLED: bool = Field(
160+
default=True,
161+
description="Globally enable/disable all extra observability (metrics/traces/log shipping).",
162+
)
160163

161164
# --- OpenTelemetry ---------------------------------------------------
162165

@@ -177,12 +180,21 @@ def _sanitize_bool_tokens(cls, data): # type: ignore[return-value]
177180
# pull metrics if desired.
178181
OTEL_METRICS_ENABLED: bool = False
179182

183+
# Protocol for OTLP export – "grpc" (default) or "http". Allows integration with collectors that only expose the HTTP/JSON OTLP endpoint (4318).
184+
OTEL_EXPORTER_OTLP_PROTOCOL: str = Field(default="grpc")
185+
186+
# Optional additional headers to send along OTLP requests (for auth tokens, etc.).
187+
# Provide as comma-separated key=value list, e.g. "mcp-token=abcd123,env=dev".
188+
OTEL_EXPORTER_OTLP_HEADERS: str | None = Field(default=None)
189+
180190
# Sample ratio (0.0-1.0) for traces – 1.0 = always.
181191
OTEL_TRACES_SAMPLER_RATIO: float = Field(default=1.0, ge=0.0, le=1.0)
182192

183193
# --- Centralised logging (Loki) --------------------------------------
184194

185-
LOKI_ENABLED: bool = Field(default=False, description="Enable structured log shipping to Loki.")
195+
LOKI_ENABLED: bool = Field(
196+
default=False, description="Enable structured log shipping to Loki."
197+
)
186198
LOKI_ENDPOINT: str | None = Field(
187199
default=None,
188200
description="Loki push API endpoint, e.g. http://loki:3100/loki/api/v1/push.",

app/utils/db_instrumentation.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""Runtime patching helpers to instrument AstraDBCollection methods.
2+
3+
Call `instrument_astra_collection()` early during application startup (done in
4+
`app.utils.observability`) and every `insert_one` / `update_one` will be
5+
surrounded by an OpenTelemetry span and Prometheus histogram sample. This
6+
provides visibility into **all** Data-API mutations without having to wrap
7+
each individual call site.
8+
"""
9+
from __future__ import annotations
10+
11+
import time
12+
from typing import Any, Awaitable
13+
14+
from opentelemetry import trace
15+
16+
from app.metrics import ASTRA_DB_QUERY_DURATION_SECONDS
17+
18+
_tracer = trace.get_tracer(__name__)
19+
20+
21+
async def _observe(op: str, coro: Awaitable[Any]): # noqa: D401
22+
"""Await *coro* while recording span + histogram for DB *op*."""
23+
24+
start = time.perf_counter()
25+
with _tracer.start_as_current_span(f"astra.{op}") as span:
26+
try:
27+
result = await coro
28+
return result
29+
finally:
30+
duration = time.perf_counter() - start
31+
ASTRA_DB_QUERY_DURATION_SECONDS.labels(operation=op).observe(duration)
32+
span.set_attribute("duration_ms", int(duration * 1000))
33+
34+
35+
def instrument_astra_collection() -> None: # noqa: D401
36+
"""Monkey-patch AstraDBCollection once per process."""
37+
38+
try:
39+
from app.db.astra_client import AstraDBCollection # type: ignore
40+
except Exception:
41+
return # Library not available in unit-test mode
42+
43+
if getattr(AstraDBCollection, "_kv_instrumented", False):
44+
return # Already patched
45+
46+
# ---------------------------- insert_one ----------------------------
47+
if hasattr(AstraDBCollection, "insert_one"):
48+
_orig_insert = AstraDBCollection.insert_one
49+
50+
async def _insert_wrap(self, *args, **kwargs): # type: ignore
51+
return await _observe("insert", _orig_insert(self, *args, **kwargs))
52+
53+
AstraDBCollection.insert_one = _insert_wrap # type: ignore[attr-defined]
54+
55+
# ---------------------------- update_one ----------------------------
56+
if hasattr(AstraDBCollection, "update_one"):
57+
_orig_update = AstraDBCollection.update_one
58+
59+
async def _update_wrap(self, *args, **kwargs): # type: ignore
60+
return await _observe("update", _orig_update(self, *args, **kwargs))
61+
62+
AstraDBCollection.update_one = _update_wrap # type: ignore[attr-defined]
63+
64+
# Mark as patched
65+
AstraDBCollection._kv_instrumented = True # type: ignore[attr-defined]

0 commit comments

Comments
 (0)