rdev/internal/metrics/metrics.go
jordan 72d16929ca feat: Implement hexagonal architecture with services, webhooks, queue, and telemetry
Major refactoring to hexagonal (ports & adapters) architecture:

- Add service layer (apikey_service, project_service) for business logic
- Add webhook system with dispatcher and delivery tracking
- Add command queue with priority-based processing
- Add rate limiting with sliding window algorithm
- Add audit logging for command execution
- Add OpenTelemetry integration (traces, metrics, spans)
- Add circuit breaker for fault tolerance
- Add cached repository wrapper for performance
- Add comprehensive validation package
- Add Kubernetes client integration for pod management
- Add database migrations (allowed_ips, audit_log, rate_limiting, queue, webhooks)
- Add network policy and PodDisruptionBudget for k8s
- Remove legacy executor and projects/registry packages
- Untrack secrets.yaml (now managed via envault)
- Add coverage.out to .gitignore
- Add e2e test infrastructure with docker-compose
- Add comprehensive documentation (API, architecture, operations, plans)
- Add golangci-lint config and pre-commit hook

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-25 19:57:46 -07:00

143 lines
4.5 KiB
Go

// Package metrics provides Prometheus metrics for the rdev API.
package metrics
import (
"net/http"
"regexp"
"strconv"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
var (
// Commands
commandsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "rdev_commands_total",
Help: "Total number of commands executed",
}, []string{"project", "type", "status"})
commandDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "rdev_command_duration_seconds",
Help: "Duration of command execution in seconds",
Buckets: prometheus.ExponentialBuckets(0.1, 2, 15), // 0.1s to ~27min
}, []string{"project", "type"})
// Streams
activeStreams = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "rdev_active_streams",
Help: "Number of active SSE streams",
}, []string{"project"})
streamReconnects = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "rdev_stream_reconnects_total",
Help: "Total number of SSE stream reconnections",
}, []string{"project"})
// Authentication
authFailures = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "rdev_auth_failures_total",
Help: "Total number of authentication failures",
}, []string{"reason"})
// API Requests
requestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "rdev_api_request_duration_seconds",
Help: "Duration of API requests in seconds",
Buckets: prometheus.DefBuckets,
}, []string{"method", "path", "status"})
requestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "rdev_api_requests_total",
Help: "Total number of API requests",
}, []string{"method", "path", "status"})
)
// RecordCommand records a command execution.
func RecordCommand(project, cmdType, status string, durationMs int64) {
commandsTotal.WithLabelValues(project, cmdType, status).Inc()
commandDuration.WithLabelValues(project, cmdType).Observe(float64(durationMs) / 1000.0)
}
// IncActiveStreams increments the active stream count for a project.
func IncActiveStreams(project string) {
activeStreams.WithLabelValues(project).Inc()
}
// DecActiveStreams decrements the active stream count for a project.
func DecActiveStreams(project string) {
activeStreams.WithLabelValues(project).Dec()
}
// RecordStreamReconnect records a stream reconnection.
func RecordStreamReconnect(project string) {
streamReconnects.WithLabelValues(project).Inc()
}
// RecordAuthFailure records an authentication failure.
func RecordAuthFailure(reason string) {
authFailures.WithLabelValues(reason).Inc()
}
// Handler returns the Prometheus HTTP handler.
func Handler() http.Handler {
return promhttp.Handler()
}
// Middleware returns an HTTP middleware that records request metrics.
func Middleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
start := time.Now()
// Wrap the response writer to capture status code
rw := &responseWriter{ResponseWriter: w, statusCode: http.StatusOK}
next.ServeHTTP(rw, r)
duration := time.Since(start).Seconds()
status := strconv.Itoa(rw.statusCode)
path := normalizePath(r.URL.Path)
requestDuration.WithLabelValues(r.Method, path, status).Observe(duration)
requestsTotal.WithLabelValues(r.Method, path, status).Inc()
})
}
// responseWriter wraps http.ResponseWriter to capture status code.
type responseWriter struct {
http.ResponseWriter
statusCode int
}
func (rw *responseWriter) WriteHeader(code int) {
rw.statusCode = code
rw.ResponseWriter.WriteHeader(code)
}
// pathNormalizers contains patterns to normalize variable path segments.
// Order matters - more specific patterns first.
var pathNormalizers = []struct {
pattern *regexp.Regexp
replace string
}{
// /keys/uuid -> /keys/{id}
{regexp.MustCompile(`^/keys/[^/]+$`), "/keys/{id}"},
// /projects/{id}/claude-config/{type}/{name} -> /projects/{id}/claude-config/{type}/{name}
{regexp.MustCompile(`^/projects/[^/]+/claude-config/(commands|skills|agents)/[^/]+$`), "/projects/{id}/claude-config/$1/{name}"},
// /projects/{id}/... (any sub-path) - must be last as it's most general
{regexp.MustCompile(`^/projects/[^/]+(/.*)?$`), "/projects/{id}$1"},
}
// normalizePath normalizes the URL path for consistent metric labels.
// Replaces variable path segments with placeholders to prevent cardinality explosion.
func normalizePath(path string) string {
for _, n := range pathNormalizers {
if n.pattern.MatchString(path) {
return n.pattern.ReplaceAllString(path, n.replace)
}
}
return path
}