1+ # Observability utilities – OpenTelemetry instrumentation, Prometheus metrics
2+ # exposition and structured log shipping.
3+ #
4+ # This module is imported by main.py during application start-up. All
5+ # instrumentation is performed in a best-effort fashion: if a particular
6+ # dependency is missing or an endpoint is unreachable we log a warning but
7+ # allow the application to continue running.
8+
9+ from __future__ import annotations
10+
11+ import logging
12+ import os
13+ import time
14+ from typing import Any
15+
16+ from fastapi import FastAPI
17+
18+ from app .core .config import settings
19+
20+ _logger = logging .getLogger (__name__ )
21+
22+
23+ # ---------------------------------------------------------------------------
24+ # Prometheus – latency histogram per route via prometheus-fastapi-instrumentator
25+ # ---------------------------------------------------------------------------
26+
27+ _PROMETHEUS_READY = False
28+
29+ try :
30+ from prometheus_fastapi_instrumentator import Instrumentator # type: ignore
31+ except ModuleNotFoundError : # pragma: no cover
32+ Instrumentator = None # type: ignore
33+ _logger .debug ("prometheus_fastapi_instrumentator not available – metrics disabled" )
34+
35+
36+ # ---------------------------------------------------------------------------
37+ # OpenTelemetry – traces + optional OTLP metric export
38+ # ---------------------------------------------------------------------------
39+
40+ _OTEL_READY = False
41+ try :
42+ from opentelemetry import trace
43+ from opentelemetry .instrumentation .fastapi import FastAPIInstrumentor # type: ignore
44+ from opentelemetry .instrumentation .logging import LoggingInstrumentor # type: ignore
45+ from opentelemetry .sdk .resources import Resource
46+ from opentelemetry .sdk .trace import TracerProvider
47+ from opentelemetry .sdk .trace .export import BatchSpanProcessor
48+ from opentelemetry .exporter .otlp .proto .grpc .trace_exporter import OTLPSpanExporter # type: ignore
49+ from opentelemetry .sdk .trace .sampling import TraceIdRatioBased
50+
51+ # Metrics are optional – only import if feature flag enabled
52+ if settings .OTEL_METRICS_ENABLED :
53+ from opentelemetry .sdk .metrics import MeterProvider # type: ignore
54+ from opentelemetry .exporter .otlp .proto .grpc .metric_exporter import ( # type: ignore
55+ OTLPMetricExporter ,
56+ )
57+ from opentelemetry .sdk .metrics .export import (
58+ PeriodicExportingMetricReader ,
59+ )
60+
61+ _OTEL_READY = True
62+ except ModuleNotFoundError as exc : # pragma: no cover
63+ _logger .debug ("OpenTelemetry libraries missing – traces disabled (%s)" , exc )
64+
65+
66+ # ---------------------------------------------------------------------------
67+ # Loki logging handler
68+ # ---------------------------------------------------------------------------
69+
70+ _LOKI_READY = False
71+ if settings .LOKI_ENABLED and settings .LOKI_ENDPOINT :
72+ try :
73+ import logging_loki # type: ignore
74+
75+ _LOKI_READY = True
76+ except ModuleNotFoundError : # pragma: no cover
77+ _logger .warning ("LOKI_ENABLED but logging_loki dependency missing; skipping Loki handler" )
78+
79+
80+ # ---------------------------------------------------------------------------
81+ # Public API
82+ # ---------------------------------------------------------------------------
83+
84+ def configure_observability (app : FastAPI ) -> None : # noqa: D401
85+ """Initialise optional observability integrations.
86+
87+ The function is safe to call multiple times – it keeps track of internal
88+ state to ensure instrumentation happens only once.
89+ """
90+
91+ if not settings .OBSERVABILITY_ENABLED :
92+ _logger .info ("Observability explicitly disabled via settings" )
93+ return
94+
95+ _setup_prometheus (app )
96+ _setup_opentelemetry (app )
97+ _setup_loki_logging ()
98+
99+
100+ # ---------------------------------------------------------------------------
101+ # Internal helpers
102+ # ---------------------------------------------------------------------------
103+
104+
105+ _prometheus_instrumented = False
106+
107+ def _setup_prometheus (app : FastAPI ) -> None :
108+ global _prometheus_instrumented
109+ if _prometheus_instrumented or Instrumentator is None :
110+ return
111+
112+ try :
113+ start_time = time .perf_counter ()
114+ Instrumentator ().instrument (app ).expose (app , include_in_schema = False , should_gzip = True )
115+ _prometheus_instrumented = True
116+ _logger .info ("Prometheus instrumentation initialised in %.2f ms" , (time .perf_counter () - start_time ) * 1000 )
117+ except Exception as exc : # pragma: no cover
118+ _logger .warning ("Failed to initialise Prometheus instrumentation: %s" , exc )
119+
120+
121+ _otel_instrumented = False
122+
123+ def _setup_opentelemetry (app : FastAPI ) -> None :
124+ global _otel_instrumented
125+ if _otel_instrumented or not _OTEL_READY or not settings .OTEL_TRACES_ENABLED :
126+ return
127+
128+ if not settings .OTEL_EXPORTER_OTLP_ENDPOINT :
129+ _logger .info ("OTEL_TRACES_ENABLED but no OTEL_EXPORTER_OTLP_ENDPOINT set – skipping" )
130+ return
131+
132+ try :
133+ start = time .perf_counter ()
134+
135+ resource_attrs : dict [str , Any ] = {
136+ "service.name" : settings .PROJECT_NAME ,
137+ "service.version" : settings .APP_VERSION ,
138+ "deployment.environment" : settings .ENVIRONMENT ,
139+ }
140+ resource = Resource .create (resource_attrs )
141+
142+ sampler = TraceIdRatioBased (settings .OTEL_TRACES_SAMPLER_RATIO )
143+ provider = TracerProvider (resource = resource , sampler = sampler )
144+ trace .set_tracer_provider (provider )
145+
146+ span_exporter = OTLPSpanExporter (endpoint = settings .OTEL_EXPORTER_OTLP_ENDPOINT , insecure = True )
147+ span_processor = BatchSpanProcessor (span_exporter )
148+ provider .add_span_processor (span_processor )
149+
150+ # Instrument FastAPI request handling
151+ FastAPIInstrumentor ().instrument_app (app , tracer_provider = provider )
152+
153+ # Correlate application logs with active spans
154+ LoggingInstrumentor ().instrument (set_logging_format = True )
155+
156+ # Optional metrics export via OTLP
157+ if settings .OTEL_METRICS_ENABLED :
158+ metric_exporter = OTLPMetricExporter (endpoint = settings .OTEL_EXPORTER_OTLP_ENDPOINT , insecure = True )
159+ reader = PeriodicExportingMetricReader (metric_exporter )
160+ meter_provider = MeterProvider (resource = resource , metric_readers = [reader ])
161+ from opentelemetry import metrics # local import to avoid missing module issue
162+
163+ metrics .set_meter_provider (meter_provider )
164+
165+ _otel_instrumented = True
166+ _logger .info ("OpenTelemetry tracing initialised (%.2f ms)" , (time .perf_counter () - start ) * 1000 )
167+ except Exception as exc : # pragma: no cover
168+ _logger .warning ("Failed to initialise OpenTelemetry tracing: %s" , exc )
169+
170+
171+ _loki_handler_added = False
172+
173+ def _setup_loki_logging () -> None :
174+ global _loki_handler_added
175+ if _loki_handler_added or not _LOKI_READY :
176+ return
177+
178+ try :
179+ import logging_loki # type: ignore
180+
181+ tags = {
182+ "service" : settings .PROJECT_NAME ,
183+ "environment" : settings .ENVIRONMENT ,
184+ }
185+ if settings .LOKI_EXTRA_LABELS :
186+ for pair in settings .LOKI_EXTRA_LABELS .split ("," ):
187+ if "=" in pair :
188+ k , v = pair .split ("=" , 1 )
189+ tags [k .strip ()] = v .strip ()
190+
191+ handler = logging_loki .LokiHandler (
192+ url = settings .LOKI_ENDPOINT , # type: ignore[arg-type]
193+ tags = tags ,
194+ version = "1" ,
195+ )
196+ logging .getLogger ().addHandler (handler )
197+ _loki_handler_added = True
198+ _logger .info ("Loki logging handler attached (endpoint=%s)" , settings .LOKI_ENDPOINT )
199+ except Exception as exc : # pragma: no cover
200+ _logger .warning ("Failed to attach Loki logging handler: %s" , exc )
0 commit comments