// Package telemetry provides OpenTelemetry integration for the rdev API. // // It initializes a tracer provider with OTLP exporter for distributed tracing. // Traces are exported to an OpenTelemetry collector (e.g., otel-collector in k8s). // // Configuration via environment variables: // - OTEL_EXPORTER_OTLP_ENDPOINT: Collector endpoint (default: otel-collector.observability.svc:4317) // - OTEL_SERVICE_NAME: Service name for traces (default: rdev-api) // - OTEL_SERVICE_VERSION: Service version (default: unknown) // - OTEL_SERVICE_NAMESPACE: Namespace (default: rdev) // - OTEL_ENABLED: Enable/disable telemetry (default: true) package telemetry import ( "context" "fmt" "log/slog" "os" "strings" "time" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" "go.opentelemetry.io/otel/propagation" "go.opentelemetry.io/otel/sdk/resource" sdktrace "go.opentelemetry.io/otel/sdk/trace" semconv "go.opentelemetry.io/otel/semconv/v1.26.0" "go.opentelemetry.io/otel/trace" "go.opentelemetry.io/otel/trace/noop" ) // Config holds telemetry configuration. type Config struct { // Endpoint is the OTLP collector endpoint (gRPC). // Default: otel-collector.observability.svc:4317 Endpoint string // ServiceName identifies this service in traces. // Default: rdev-api ServiceName string // ServiceVersion is the version of this service. // Default: unknown ServiceVersion string // ServiceNamespace groups related services. // Default: rdev ServiceNamespace string // Enabled controls whether telemetry is active. // Default: true Enabled bool // Insecure disables TLS for the gRPC connection. // Default: true (for internal k8s communication) Insecure bool // Logger for telemetry initialization messages. Logger *slog.Logger } // DefaultConfig returns configuration with defaults applied. func DefaultConfig() Config { return Config{ Endpoint: getEnv("OTEL_EXPORTER_OTLP_ENDPOINT", "otel-collector.observability.svc:4317"), ServiceName: getEnv("OTEL_SERVICE_NAME", "rdev-api"), ServiceVersion: getEnv("OTEL_SERVICE_VERSION", "unknown"), ServiceNamespace: getEnv("OTEL_SERVICE_NAMESPACE", "rdev"), Enabled: getEnvBool("OTEL_ENABLED", true), Insecure: true, } } // Telemetry manages OpenTelemetry resources. type Telemetry struct { config Config tracerProvider *sdktrace.TracerProvider tracer trace.Tracer logger *slog.Logger } // New creates and initializes a new Telemetry instance. // Call Shutdown() when done to flush pending traces. func New(ctx context.Context, cfg Config) (*Telemetry, error) { logger := cfg.Logger if logger == nil { logger = slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ Level: slog.LevelInfo, })) } t := &Telemetry{ config: cfg, logger: logger, } if !cfg.Enabled { logger.Info("telemetry disabled, using noop tracer") t.tracer = noop.NewTracerProvider().Tracer(cfg.ServiceName) return t, nil } // Create OTLP exporter opts := []otlptracegrpc.Option{ otlptracegrpc.WithEndpoint(cfg.Endpoint), } if cfg.Insecure { opts = append(opts, otlptracegrpc.WithInsecure()) } exporter, err := otlptracegrpc.New(ctx, opts...) if err != nil { return nil, fmt.Errorf("failed to create OTLP exporter: %w", err) } // Create resource with service information // Note: We create a new resource instead of merging with Default() to avoid // schema URL conflicts between different semconv versions res := resource.NewWithAttributes( semconv.SchemaURL, semconv.ServiceName(cfg.ServiceName), semconv.ServiceVersion(cfg.ServiceVersion), semconv.ServiceNamespace(cfg.ServiceNamespace), attribute.String("deployment.environment", getEnv("ENVIRONMENT", "production")), ) // Create tracer provider with batch span processor tp := sdktrace.NewTracerProvider( sdktrace.WithBatcher(exporter, sdktrace.WithBatchTimeout(5*time.Second), sdktrace.WithMaxExportBatchSize(512), ), sdktrace.WithResource(res), sdktrace.WithSampler(sdktrace.AlwaysSample()), ) // Set as global tracer provider otel.SetTracerProvider(tp) // Set up propagation (W3C Trace Context + Baggage) otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( propagation.TraceContext{}, propagation.Baggage{}, )) t.tracerProvider = tp t.tracer = tp.Tracer(cfg.ServiceName) logger.Info("telemetry initialized", "endpoint", cfg.Endpoint, "service", cfg.ServiceName, "version", cfg.ServiceVersion, "namespace", cfg.ServiceNamespace, ) return t, nil } // Tracer returns the tracer for creating spans. func (t *Telemetry) Tracer() trace.Tracer { return t.tracer } // Shutdown gracefully shuts down the telemetry, flushing any pending traces. // Should be called during application shutdown. func (t *Telemetry) Shutdown(ctx context.Context) error { if t.tracerProvider == nil { return nil } t.logger.Info("shutting down telemetry") // Create a timeout context if none provided if _, hasDeadline := ctx.Deadline(); !hasDeadline { var cancel context.CancelFunc ctx, cancel = context.WithTimeout(ctx, 10*time.Second) defer cancel() } if err := t.tracerProvider.Shutdown(ctx); err != nil { return fmt.Errorf("telemetry shutdown failed: %w", err) } t.logger.Info("telemetry shutdown complete") return nil } // StartSpan starts a new span with the given name. // Returns the span and a new context containing the span. func (t *Telemetry) StartSpan(ctx context.Context, name string, opts ...trace.SpanStartOption) (context.Context, trace.Span) { return t.tracer.Start(ctx, name, opts...) } // AddSpanEvent adds an event to the current span in the context. func AddSpanEvent(ctx context.Context, name string, attrs ...attribute.KeyValue) { span := trace.SpanFromContext(ctx) span.AddEvent(name, trace.WithAttributes(attrs...)) } // SetSpanError records an error on the current span. func SetSpanError(ctx context.Context, err error) { span := trace.SpanFromContext(ctx) span.RecordError(err) } // SetSpanAttributes sets attributes on the current span. func SetSpanAttributes(ctx context.Context, attrs ...attribute.KeyValue) { span := trace.SpanFromContext(ctx) span.SetAttributes(attrs...) } // getEnv returns the environment variable value or the default. func getEnv(key, defaultVal string) string { if v := os.Getenv(key); v != "" { return v } return defaultVal } // getEnvBool returns the environment variable as bool or the default. func getEnvBool(key string, defaultVal bool) bool { v := os.Getenv(key) if v == "" { return defaultVal } v = strings.ToLower(v) return v == "true" || v == "1" || v == "yes" }