rdev/pkg/api/health.go
jordan a9ad3d8304
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
chore: accumulated platform hardening and CI fixes
CI / Woodpecker:
- Add explicit depends_on to all .woodpecker.yml steps (rdev + templates)
- Fix skip_tls_verify -> skip-tls-verify (correct Kaniko flag name)
- Add replicasets get/list to deployer RBAC for rollout status
- Skeleton template: add failure:ignore on docs steps, Traefik TLS
  annotations on ingress, depends_on on verify step

Component templates:
- Fix container name in deploy steps (PROJECT_NAME-COMPONENT_NAME)
- Replace kubectl scale with kubectl patch for replicas
- Add post-deploy image verification and rollout status checks
- Applied consistently across all 5 component templates

Adapters:
- gitea: Add HTTP client timeout (30s), context cancellation checks,
  handle 404 on GetRepo/DeleteRepo
- zot: Add retry with exponential backoff (doWithRetry), limit response
  body reads to 10MB
- cockroach: Use net.JoinHostPort for IPv6-safe DSN construction
- woodpecker: Fix error wrapping (%v -> %w)
- redis: Fix error wrapping (%v -> %w)
- deployer: Add context cancellation checks

Services:
- apikey_service: Fix error wrapping (%v -> %w)
- component_deploy: Fix error wrapping (%v -> %w)
- project_infra: Fix error wrapping (%v -> %w)
- webhook/dispatcher: Fix error wrapping (%v -> %w)

Other:
- CLAUDE.md: Add guide links for Gitea, Go 1.25, Woodpecker v3,
  Traefik v3, Zot registry
- circuitbreaker: Add test for error wrapping
- docs: Update deployment, troubleshooting, and runbook docs
- health: Fix error wrapping (%v -> %w)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-10 23:16:56 -07:00

215 lines
5.5 KiB
Go

package api
import (
"context"
"net/http"
"sync"
"time"
)
// HealthChecker is a function that checks the health of a dependency.
// Returns nil if healthy, an error describing the issue otherwise.
type HealthChecker func(ctx context.Context) error
// HealthCheckResult represents the result of a single health check.
type HealthCheckResult struct {
Name string `json:"name"`
Status string `json:"status"` // "healthy" or "unhealthy"
Latency string `json:"latency,omitempty"`
Error string `json:"error,omitempty"`
}
// HealthResponse is the response structure for health endpoints.
type HealthResponse struct {
Status string `json:"status"` // "healthy" or "unhealthy"
Service string `json:"service"`
Checks []HealthCheckResult `json:"checks,omitempty"`
Duration string `json:"duration,omitempty"`
}
// HealthConfig configures health check behavior.
type HealthConfig struct {
// Service name for identification
Service string
// Timeout for individual health checks (default: 5s)
Timeout time.Duration
// Checks is a map of check names to checker functions
Checks map[string]HealthChecker
}
// NewHealthHandler creates an HTTP handler that runs health checks concurrently.
// Returns 200 if all checks pass, 503 if any check fails.
//
// Example:
//
// healthHandler := api.NewHealthHandler(api.HealthConfig{
// Service: "my-service",
// Timeout: 5 * time.Second,
// Checks: map[string]api.HealthChecker{
// "database": func(ctx context.Context) error {
// return db.PingContext(ctx)
// },
// "redis": func(ctx context.Context) error {
// return redis.Ping(ctx).Err()
// },
// },
// })
//
// r.Get("/health", healthHandler)
func NewHealthHandler(cfg HealthConfig) http.HandlerFunc {
if cfg.Timeout == 0 {
cfg.Timeout = 5 * time.Second
}
return func(w http.ResponseWriter, r *http.Request) {
start := time.Now()
// If no checks configured, return simple healthy response
if len(cfg.Checks) == 0 {
WriteSuccess(w, r, HealthResponse{
Status: "healthy",
Service: cfg.Service,
})
return
}
// Create context with timeout
ctx, cancel := context.WithTimeout(r.Context(), cfg.Timeout)
defer cancel()
// Run checks concurrently
results := make([]HealthCheckResult, 0, len(cfg.Checks))
var mu sync.Mutex
var wg sync.WaitGroup
for name, checker := range cfg.Checks {
wg.Add(1) // TODO: Migrate to wg.Go() (Go 1.25)
go func(name string, checker HealthChecker) {
defer wg.Done()
checkStart := time.Now()
err := checker(ctx)
latency := time.Since(checkStart)
result := HealthCheckResult{
Name: name,
Status: "healthy",
Latency: latency.Round(time.Millisecond).String(),
}
if err != nil {
result.Status = "unhealthy"
result.Error = err.Error()
}
mu.Lock()
results = append(results, result)
mu.Unlock()
}(name, checker)
}
wg.Wait()
// Determine overall status
status := "healthy"
httpStatus := http.StatusOK
for _, result := range results {
if result.Status == "unhealthy" {
status = "unhealthy"
httpStatus = http.StatusServiceUnavailable
break
}
}
resp := HealthResponse{
Status: status,
Service: cfg.Service,
Checks: results,
Duration: time.Since(start).Round(time.Millisecond).String(),
}
WriteJSON(w, r, httpStatus, resp)
}
}
// NewLivenessHandler creates a simple liveness probe handler.
// Always returns 200 OK if the process is running.
// Use for Kubernetes liveness probes.
//
// Example:
//
// r.Get("/health/live", api.NewLivenessHandler("my-service"))
func NewLivenessHandler(service string) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
WriteSuccess(w, r, map[string]string{
"status": "ok",
"service": service,
})
}
}
// NewReadinessHandler creates a readiness probe handler with dependency checks.
// Returns 200 if all checks pass, 503 if any check fails.
// Use for Kubernetes readiness probes.
//
// Example:
//
// r.Get("/health/ready", api.NewReadinessHandler(api.HealthConfig{
// Service: "my-service",
// Checks: map[string]api.HealthChecker{
// "database": dbHealthCheck,
// },
// }))
func NewReadinessHandler(cfg HealthConfig) http.HandlerFunc {
return NewHealthHandler(cfg)
}
// -----------------------------------------------------------------------------
// Common Health Checkers
// -----------------------------------------------------------------------------
// PingChecker creates a health checker from a Ping function.
// Many database clients have a Ping or PingContext method.
//
// Example:
//
// checks := map[string]api.HealthChecker{
// "postgres": api.PingChecker(db.PingContext),
// }
func PingChecker(pingFn func(context.Context) error) HealthChecker {
return pingFn
}
// HTTPChecker creates a health checker that makes an HTTP GET request.
// Returns error if status is not 2xx.
//
// Example:
//
// checks := map[string]api.HealthChecker{
// "external-api": api.HTTPChecker("https://api.example.com/health"),
// }
func HTTPChecker(url string) HealthChecker {
return func(ctx context.Context) error {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return err
}
client := &http.Client{Timeout: 5 * time.Second}
resp, err := client.Do(req)
if err != nil {
return err
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return Internalf("unhealthy: status %d", resp.StatusCode)
}
return nil
}
}