- Add ListPipelines/GetPipeline to CIProvider port with Woodpecker adapter
- Add DNS alias endpoints: GET/POST/DELETE /projects/{id}/domains
- Implement worker executor daemon, build executor, and git operations
- Add build service, worker service, and build audit tracking
- Add worker registry with PostgreSQL adapter and migration
- Add multi-provider code agent interface (Claude Code + OpenCode)
- Add create-and-build combo endpoint
- Update landing-page cookbook to reflect all gaps closed
- Fix tech debt: unified validation, auth scopes, error wrapping, slog patterns
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
241 lines
8.1 KiB
Go
241 lines
8.1 KiB
Go
// Package metrics provides Prometheus metrics for the rdev API.
|
|
package metrics
|
|
|
|
import (
|
|
"net/http"
|
|
"regexp"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
)
|
|
|
|
var (
|
|
// Commands
|
|
commandsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "rdev_commands_total",
|
|
Help: "Total number of commands executed",
|
|
}, []string{"project", "type", "status"})
|
|
|
|
commandDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "rdev_command_duration_seconds",
|
|
Help: "Duration of command execution in seconds",
|
|
Buckets: prometheus.ExponentialBuckets(0.1, 2, 15), // 0.1s to ~27min
|
|
}, []string{"project", "type"})
|
|
|
|
// Code Agents
|
|
agentRequestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "rdev_agent_requests_total",
|
|
Help: "Total number of code agent requests",
|
|
}, []string{"provider", "status"})
|
|
|
|
agentRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "rdev_agent_request_duration_seconds",
|
|
Help: "Duration of code agent requests in seconds",
|
|
Buckets: prometheus.ExponentialBuckets(0.1, 2, 15), // 0.1s to ~27min
|
|
}, []string{"provider"})
|
|
|
|
agentToolUse = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "rdev_agent_tool_use_total",
|
|
Help: "Total number of tool invocations by code agents",
|
|
}, []string{"provider", "tool"})
|
|
|
|
agentAvailability = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "rdev_agent_available",
|
|
Help: "Whether the code agent is available (1) or not (0)",
|
|
}, []string{"provider"})
|
|
|
|
// Worker Pool
|
|
workersTotal = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "rdev_workers_total",
|
|
Help: "Number of registered workers by status",
|
|
}, []string{"status"})
|
|
|
|
workerHeartbeatAge = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "rdev_worker_heartbeat_age_seconds",
|
|
Help: "Age of the most recent worker heartbeat in seconds",
|
|
}, []string{"worker_id"})
|
|
|
|
// Builds
|
|
buildsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "rdev_builds_total",
|
|
Help: "Total number of build tasks by status",
|
|
}, []string{"project", "status"})
|
|
|
|
buildDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "rdev_build_duration_seconds",
|
|
Help: "Duration of build executions in seconds",
|
|
Buckets: prometheus.ExponentialBuckets(1, 2, 12), // 1s to ~34min
|
|
}, []string{"project"})
|
|
|
|
// Work Queue
|
|
workQueueDepth = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "rdev_work_queue_depth",
|
|
Help: "Number of tasks in the work queue by status",
|
|
}, []string{"status"})
|
|
|
|
// Streams
|
|
activeStreams = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "rdev_active_streams",
|
|
Help: "Number of active SSE streams",
|
|
}, []string{"project"})
|
|
|
|
streamReconnects = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "rdev_stream_reconnects_total",
|
|
Help: "Total number of SSE stream reconnections",
|
|
}, []string{"project"})
|
|
|
|
// Authentication
|
|
authFailures = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "rdev_auth_failures_total",
|
|
Help: "Total number of authentication failures",
|
|
}, []string{"reason"})
|
|
|
|
// API Requests
|
|
requestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "rdev_api_request_duration_seconds",
|
|
Help: "Duration of API requests in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{"method", "path", "status"})
|
|
|
|
requestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "rdev_api_requests_total",
|
|
Help: "Total number of API requests",
|
|
}, []string{"method", "path", "status"})
|
|
)
|
|
|
|
// RecordCommand records a command execution.
|
|
func RecordCommand(project, cmdType, status string, durationMs int64) {
|
|
commandsTotal.WithLabelValues(project, cmdType, status).Inc()
|
|
commandDuration.WithLabelValues(project, cmdType).Observe(float64(durationMs) / 1000.0)
|
|
}
|
|
|
|
// IncActiveStreams increments the active stream count for a project.
|
|
func IncActiveStreams(project string) {
|
|
activeStreams.WithLabelValues(project).Inc()
|
|
}
|
|
|
|
// DecActiveStreams decrements the active stream count for a project.
|
|
func DecActiveStreams(project string) {
|
|
activeStreams.WithLabelValues(project).Dec()
|
|
}
|
|
|
|
// RecordStreamReconnect records a stream reconnection.
|
|
func RecordStreamReconnect(project string) {
|
|
streamReconnects.WithLabelValues(project).Inc()
|
|
}
|
|
|
|
// RecordAuthFailure records an authentication failure.
|
|
func RecordAuthFailure(reason string) {
|
|
authFailures.WithLabelValues(reason).Inc()
|
|
}
|
|
|
|
// RecordAgentRequest records a code agent request execution.
|
|
func RecordAgentRequest(provider, status string, durationMs int64) {
|
|
agentRequestsTotal.WithLabelValues(provider, status).Inc()
|
|
agentRequestDuration.WithLabelValues(provider).Observe(float64(durationMs) / 1000.0)
|
|
}
|
|
|
|
// RecordAgentToolUse records a tool invocation by a code agent.
|
|
func RecordAgentToolUse(provider, tool string) {
|
|
agentToolUse.WithLabelValues(provider, tool).Inc()
|
|
}
|
|
|
|
// SetAgentAvailability sets the availability status of a code agent.
|
|
func SetAgentAvailability(provider string, available bool) {
|
|
val := 0.0
|
|
if available {
|
|
val = 1.0
|
|
}
|
|
agentAvailability.WithLabelValues(provider).Set(val)
|
|
}
|
|
|
|
// SetWorkerCount sets the number of workers for a given status.
|
|
func SetWorkerCount(status string, count int) {
|
|
workersTotal.WithLabelValues(status).Set(float64(count))
|
|
}
|
|
|
|
// RecordWorkerHeartbeat sets the age of a worker's most recent heartbeat.
|
|
func RecordWorkerHeartbeat(workerID string, ageSeconds float64) {
|
|
workerHeartbeatAge.WithLabelValues(workerID).Set(ageSeconds)
|
|
}
|
|
|
|
// RecordBuild records a build task completion.
|
|
func RecordBuild(project, status string, durationMs int64) {
|
|
buildsTotal.WithLabelValues(project, status).Inc()
|
|
if durationMs > 0 {
|
|
buildDuration.WithLabelValues(project).Observe(float64(durationMs) / 1000.0)
|
|
}
|
|
}
|
|
|
|
// SetWorkQueueDepth sets the current depth of the work queue for a status.
|
|
func SetWorkQueueDepth(status string, count int64) {
|
|
workQueueDepth.WithLabelValues(status).Set(float64(count))
|
|
}
|
|
|
|
// Handler returns the Prometheus HTTP handler.
|
|
func Handler() http.Handler {
|
|
return promhttp.Handler()
|
|
}
|
|
|
|
// Middleware returns an HTTP middleware that records request metrics.
|
|
func Middleware(next http.Handler) http.Handler {
|
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
start := time.Now()
|
|
|
|
// Wrap the response writer to capture status code
|
|
rw := &responseWriter{ResponseWriter: w, statusCode: http.StatusOK}
|
|
|
|
next.ServeHTTP(rw, r)
|
|
|
|
duration := time.Since(start).Seconds()
|
|
status := strconv.Itoa(rw.statusCode)
|
|
path := normalizePath(r.URL.Path)
|
|
|
|
requestDuration.WithLabelValues(r.Method, path, status).Observe(duration)
|
|
requestsTotal.WithLabelValues(r.Method, path, status).Inc()
|
|
})
|
|
}
|
|
|
|
// responseWriter wraps http.ResponseWriter to capture status code.
|
|
type responseWriter struct {
|
|
http.ResponseWriter
|
|
statusCode int
|
|
}
|
|
|
|
func (rw *responseWriter) WriteHeader(code int) {
|
|
rw.statusCode = code
|
|
rw.ResponseWriter.WriteHeader(code)
|
|
}
|
|
|
|
// pathNormalizers contains patterns to normalize variable path segments.
|
|
// Order matters - more specific patterns first.
|
|
var pathNormalizers = []struct {
|
|
pattern *regexp.Regexp
|
|
replace string
|
|
}{
|
|
// /keys/uuid -> /keys/{id}
|
|
{regexp.MustCompile(`^/keys/[^/]+$`), "/keys/{id}"},
|
|
// /workers/{id}/... -> /workers/{id}/...
|
|
{regexp.MustCompile(`^/workers/[^/]+(/.*)?$`), "/workers/{id}$1"},
|
|
// /builds/{id} -> /builds/{id}
|
|
{regexp.MustCompile(`^/builds/[^/]+$`), "/builds/{id}"},
|
|
// /projects/{id}/claude-config/{type}/{name} -> /projects/{id}/claude-config/{type}/{name}
|
|
{regexp.MustCompile(`^/projects/[^/]+/claude-config/(commands|skills|agents)/[^/]+$`), "/projects/{id}/claude-config/$1/{name}"},
|
|
// /projects/{id}/... (any sub-path) - must be last as it's most general
|
|
{regexp.MustCompile(`^/projects/[^/]+(/.*)?$`), "/projects/{id}$1"},
|
|
}
|
|
|
|
// normalizePath normalizes the URL path for consistent metric labels.
|
|
// Replaces variable path segments with placeholders to prevent cardinality explosion.
|
|
func normalizePath(path string) string {
|
|
for _, n := range pathNormalizers {
|
|
if n.pattern.MatchString(path) {
|
|
return n.pattern.ReplaceAllString(path, n.replace)
|
|
}
|
|
}
|
|
return path
|
|
}
|