rdev/internal/handlers/workers.go
jordan 9226454b85
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
feat: label-based undeploy, GC reconciliation, checkout/sessions, pool status
- Add UndeployAll() using label selectors to clean up monorepo components
  on project deletion (replaces name-based Undeploy in DeleteProject and
  the direct undeploy handler)
- Add ResourceGC background worker that periodically finds K8s resources
  whose project label has no matching DB record, deletes after 1h safety
  window
- Widen deployer client type from *kubernetes.Clientset to
  kubernetes.Interface for testability
- UndeployAll accumulates errors via errors.Join instead of failing fast
- Add checkout/checkin sidecar dev flow: temporary git tokens, branch
  checkout, review on checkin with cleanup workers
- Add interactive sessions: pod binding, command execution, SSE streaming,
  ephemeral preview URLs with session cleanup workers
- Add GET /workers/pool endpoint for aggregate capacity and queue depth
- Add sessions:read and sessions:execute auth scopes

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 19:11:28 -07:00

450 lines
13 KiB
Go

// Package handlers provides HTTP handlers for the rdev API.
package handlers
import (
"errors"
"net/http"
"github.com/go-chi/chi/v5"
"github.com/orchard9/rdev/internal/auth"
"github.com/orchard9/rdev/internal/domain"
"github.com/orchard9/rdev/internal/port"
"github.com/orchard9/rdev/internal/service"
"github.com/orchard9/rdev/pkg/api"
)
// WorkersHandler handles worker pool management endpoints.
type WorkersHandler struct {
workerService *service.WorkerService
workService service.WorkServiceFailer
workQueue port.WorkQueue
}
// NewWorkersHandler creates a new workers handler.
func NewWorkersHandler(workerService *service.WorkerService) *WorkersHandler {
return &WorkersHandler{
workerService: workerService,
}
}
// WithWorkService adds a work service for task failure handling.
// This is required for standalone worker endpoints.
func (h *WorkersHandler) WithWorkService(ws service.WorkServiceFailer) *WorkersHandler {
h.workService = ws
return h
}
// WithWorkQueue adds a work queue for pool status endpoint.
func (h *WorkersHandler) WithWorkQueue(wq port.WorkQueue) *WorkersHandler {
h.workQueue = wq
return h
}
// Mount registers the worker pool routes.
func (h *WorkersHandler) Mount(r api.Router) {
r.Route("/workers", func(r chi.Router) {
// Read operations
r.With(auth.RequireScope(auth.ScopeWorkersRead, auth.ScopeAdmin)).Get("/", h.List)
r.With(auth.RequireScope(auth.ScopeWorkersRead, auth.ScopeAdmin)).Get("/pool", h.PoolStatus)
r.With(auth.RequireScope(auth.ScopeWorkersRead, auth.ScopeAdmin)).Get("/{workerId}", h.Get)
// Write operations
r.With(auth.RequireScope(auth.ScopeWorkersWrite, auth.ScopeAdmin)).Post("/register", h.Register)
r.With(auth.RequireScope(auth.ScopeWorkersWrite, auth.ScopeAdmin)).Post("/{workerId}/heartbeat", h.Heartbeat)
r.With(auth.RequireScope(auth.ScopeWorkersWrite, auth.ScopeAdmin)).Post("/{workerId}/drain", h.Drain)
// Standalone worker task operations
r.With(auth.RequireScope(auth.ScopeWorkersWrite, auth.ScopeAdmin)).Post("/{workerId}/claim", h.ClaimTask)
r.With(auth.RequireScope(auth.ScopeWorkersWrite, auth.ScopeAdmin)).Post("/{workerId}/complete/{taskId}", h.CompleteTask)
r.With(auth.RequireScope(auth.ScopeWorkersWrite, auth.ScopeAdmin)).Post("/{workerId}/fail/{taskId}", h.FailTask)
})
}
// WorkerDTO is the data transfer object for workers.
type WorkerDTO struct {
ID string `json:"id"`
Hostname string `json:"hostname"`
Status string `json:"status"`
CurrentTask string `json:"current_task,omitempty"`
Capabilities []string `json:"capabilities,omitempty"`
RegisteredAt string `json:"registered_at"`
LastHeartbeat string `json:"last_heartbeat"`
Version string `json:"version,omitempty"`
}
func toWorkerDTO(w *domain.Worker) *WorkerDTO {
if w == nil {
return nil
}
return &WorkerDTO{
ID: w.ID,
Hostname: w.Hostname,
Status: string(w.Status),
CurrentTask: w.CurrentTask,
Capabilities: w.Capabilities,
RegisteredAt: w.RegisteredAt.Format("2006-01-02T15:04:05Z07:00"),
LastHeartbeat: w.LastHeartbeat.Format("2006-01-02T15:04:05Z07:00"),
Version: w.Version,
}
}
// List returns all workers with optional status filter.
// GET /workers?status=idle
func (h *WorkersHandler) List(w http.ResponseWriter, r *http.Request) {
filter := port.WorkerFilter{}
if s := r.URL.Query().Get("status"); s != "" {
st := domain.WorkerStatus(s)
if !st.IsValid() {
api.WriteBadRequest(w, r, "invalid status: must be idle, busy, draining, or offline")
return
}
filter.Status = &st
}
workers, err := h.workerService.ListWorkers(r.Context(), filter)
if err != nil {
api.WriteInternalError(w, r, "failed to list workers")
return
}
dtos := make([]*WorkerDTO, len(workers))
for i, wkr := range workers {
dtos[i] = toWorkerDTO(wkr)
}
// Compute summary counts
idle, busy, draining, offline := 0, 0, 0, 0
for _, wkr := range workers {
switch wkr.Status {
case domain.WorkerStatusIdle:
idle++
case domain.WorkerStatusBusy:
busy++
case domain.WorkerStatusDraining:
draining++
case domain.WorkerStatusOffline:
offline++
}
}
api.WriteSuccess(w, r, map[string]any{
"workers": dtos,
"total": len(dtos),
"summary": map[string]int{
"idle": idle,
"busy": busy,
"draining": draining,
"offline": offline,
},
})
}
// Get returns a specific worker by ID.
// GET /workers/{workerId}
func (h *WorkersHandler) Get(w http.ResponseWriter, r *http.Request) {
workerID := chi.URLParam(r, "workerId")
if workerID == "" {
api.WriteBadRequest(w, r, "worker ID is required")
return
}
worker, err := h.workerService.GetWorker(r.Context(), workerID)
if err != nil {
if errors.Is(err, domain.ErrWorkerNotFound) {
api.WriteNotFound(w, r, "worker not found: "+workerID)
return
}
api.WriteInternalError(w, r, "failed to get worker")
return
}
api.WriteSuccess(w, r, toWorkerDTO(worker))
}
// RegisterWorkerRequest is the request body for POST /workers/register.
type RegisterWorkerRequest struct {
ID string `json:"id"`
Hostname string `json:"hostname"`
Version string `json:"version,omitempty"`
Capabilities []string `json:"capabilities,omitempty"`
}
// Register handles worker self-registration.
// POST /workers/register
func (h *WorkersHandler) Register(w http.ResponseWriter, r *http.Request) {
var req RegisterWorkerRequest
if err := api.DecodeJSON(r, &req); err != nil {
api.WriteBadRequest(w, r, "invalid request body")
return
}
if req.ID == "" {
api.WriteBadRequest(w, r, "worker id is required")
return
}
if req.Hostname == "" {
api.WriteBadRequest(w, r, "hostname is required")
return
}
worker := &domain.Worker{
ID: req.ID,
Hostname: req.Hostname,
Version: req.Version,
Capabilities: req.Capabilities,
}
if err := h.workerService.Register(r.Context(), worker); err != nil {
api.WriteInternalError(w, r, "failed to register worker")
return
}
api.WriteCreated(w, r, toWorkerDTO(worker))
}
// Heartbeat handles worker heartbeat.
// POST /workers/{workerId}/heartbeat
func (h *WorkersHandler) Heartbeat(w http.ResponseWriter, r *http.Request) {
workerID := chi.URLParam(r, "workerId")
if workerID == "" {
api.WriteBadRequest(w, r, "worker ID is required")
return
}
if err := h.workerService.Heartbeat(r.Context(), workerID); err != nil {
if errors.Is(err, domain.ErrWorkerNotFound) {
api.WriteNotFound(w, r, "worker not found: "+workerID)
return
}
api.WriteInternalError(w, r, "failed to update heartbeat")
return
}
api.WriteSuccess(w, r, map[string]any{
"worker_id": workerID,
"status": "ok",
})
}
// Drain sets a worker to draining status.
// POST /workers/{workerId}/drain
func (h *WorkersHandler) Drain(w http.ResponseWriter, r *http.Request) {
workerID := chi.URLParam(r, "workerId")
if workerID == "" {
api.WriteBadRequest(w, r, "worker ID is required")
return
}
if err := h.workerService.DrainWorker(r.Context(), workerID); err != nil {
if errors.Is(err, domain.ErrWorkerNotFound) {
api.WriteNotFound(w, r, "worker not found: "+workerID)
return
}
api.WriteInternalError(w, r, "failed to drain worker")
return
}
api.WriteSuccess(w, r, map[string]any{
"worker_id": workerID,
"status": "draining",
"message": "worker will finish current task then stop accepting new work",
})
}
// ClaimTask claims the next available task for a worker.
// POST /workers/{workerId}/claim
func (h *WorkersHandler) ClaimTask(w http.ResponseWriter, r *http.Request) {
workerID := chi.URLParam(r, "workerId")
if workerID == "" {
api.WriteBadRequest(w, r, "worker ID is required")
return
}
task, err := h.workerService.ClaimTask(r.Context(), workerID)
if err != nil {
if errors.Is(err, domain.ErrWorkerNotFound) {
api.WriteNotFound(w, r, "worker not found: "+workerID)
return
}
api.WriteInternalError(w, r, "failed to claim task")
return
}
if task == nil {
// No tasks available - return 204 No Content
w.WriteHeader(http.StatusNoContent)
return
}
api.WriteSuccess(w, r, map[string]any{
"task": toWorkTaskDTO(task),
"worker_id": workerID,
})
}
// CompleteTaskRequest is the request body for POST /workers/{workerId}/complete/{taskId}.
type CompleteTaskRequest struct {
Success bool `json:"success"`
Output string `json:"output,omitempty"`
Error string `json:"error,omitempty"`
CommitSHA string `json:"commit_sha,omitempty"`
FilesChanged []string `json:"files_changed,omitempty"`
Artifacts map[string]string `json:"artifacts,omitempty"`
DurationMs int64 `json:"duration_ms,omitempty"`
}
// CompleteTask marks a task as complete.
// POST /workers/{workerId}/complete/{taskId}
func (h *WorkersHandler) CompleteTask(w http.ResponseWriter, r *http.Request) {
workerID := chi.URLParam(r, "workerId")
taskID := chi.URLParam(r, "taskId")
if workerID == "" {
api.WriteBadRequest(w, r, "worker ID is required")
return
}
if taskID == "" {
api.WriteBadRequest(w, r, "task ID is required")
return
}
var req CompleteTaskRequest
if err := api.DecodeJSON(r, &req); err != nil {
api.WriteBadRequest(w, r, "invalid request body")
return
}
result := &domain.BuildResult{
Success: req.Success,
Output: req.Output,
Error: req.Error,
CommitSHA: req.CommitSHA,
FilesChanged: req.FilesChanged,
DurationMs: req.DurationMs,
Artifacts: req.Artifacts,
}
if err := h.workerService.CompleteTask(r.Context(), workerID, taskID, result); err != nil {
if errors.Is(err, domain.ErrWorkerNotFound) {
api.WriteNotFound(w, r, "worker not found: "+workerID)
return
}
api.WriteInternalError(w, r, "failed to complete task")
return
}
api.WriteSuccess(w, r, map[string]any{
"task_id": taskID,
"worker_id": workerID,
"status": "completed",
})
}
// FailTaskRequest is the request body for POST /workers/{workerId}/fail/{taskId}.
type FailTaskRequest struct {
Error string `json:"error"`
Output string `json:"output,omitempty"`
DurationMs int64 `json:"duration_ms,omitempty"`
}
// FailTask marks a task as failed.
// POST /workers/{workerId}/fail/{taskId}
func (h *WorkersHandler) FailTask(w http.ResponseWriter, r *http.Request) {
workerID := chi.URLParam(r, "workerId")
taskID := chi.URLParam(r, "taskId")
if workerID == "" {
api.WriteBadRequest(w, r, "worker ID is required")
return
}
if taskID == "" {
api.WriteBadRequest(w, r, "task ID is required")
return
}
var req FailTaskRequest
if err := api.DecodeJSON(r, &req); err != nil {
api.WriteBadRequest(w, r, "invalid request body")
return
}
if h.workService == nil {
api.WriteInternalError(w, r, "work service not configured")
return
}
result := &domain.BuildResult{
Success: false,
Output: req.Output,
Error: req.Error,
DurationMs: req.DurationMs,
}
if err := h.workerService.FailTask(r.Context(), workerID, taskID, result, h.workService); err != nil {
if errors.Is(err, domain.ErrWorkerNotFound) {
api.WriteNotFound(w, r, "worker not found: "+workerID)
return
}
api.WriteInternalError(w, r, "failed to fail task")
return
}
api.WriteSuccess(w, r, map[string]any{
"task_id": taskID,
"worker_id": workerID,
"status": "failed",
})
}
// PoolStatusResponse is the aggregate worker pool capacity.
type PoolStatusResponse struct {
Total int `json:"total"`
Idle int `json:"idle"`
Busy int `json:"busy"`
Draining int `json:"draining"`
Offline int `json:"offline"`
Available int `json:"available"` // Idle workers ready to accept work
QueueDepth int64 `json:"queue_depth"` // Pending tasks in work queue
}
// PoolStatus returns aggregate worker pool capacity for scaling decisions.
// GET /workers/pool
func (h *WorkersHandler) PoolStatus(w http.ResponseWriter, r *http.Request) {
workers, err := h.workerService.ListWorkers(r.Context(), port.WorkerFilter{})
if err != nil {
api.WriteInternalError(w, r, "failed to list workers")
return
}
idle, busy, draining, offline := 0, 0, 0, 0
for _, wkr := range workers {
switch wkr.Status {
case domain.WorkerStatusIdle:
idle++
case domain.WorkerStatusBusy:
busy++
case domain.WorkerStatusDraining:
draining++
case domain.WorkerStatusOffline:
offline++
}
}
var queueDepth int64
if h.workQueue != nil {
stats, err := h.workQueue.GetStats(r.Context())
if err == nil && stats != nil {
queueDepth = stats.Pending
}
}
api.WriteSuccess(w, r, PoolStatusResponse{
Total: len(workers),
Idle: idle,
Busy: busy,
Draining: draining,
Offline: offline,
Available: idle,
QueueDepth: queueDepth,
})
}