rdev/internal/worker/resource_gc.go
jordan 9226454b85
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
feat: label-based undeploy, GC reconciliation, checkout/sessions, pool status
- Add UndeployAll() using label selectors to clean up monorepo components
  on project deletion (replaces name-based Undeploy in DeleteProject and
  the direct undeploy handler)
- Add ResourceGC background worker that periodically finds K8s resources
  whose project label has no matching DB record, deletes after 1h safety
  window
- Widen deployer client type from *kubernetes.Clientset to
  kubernetes.Interface for testability
- UndeployAll accumulates errors via errors.Join instead of failing fast
- Add checkout/checkin sidecar dev flow: temporary git tokens, branch
  checkout, review on checkin with cleanup workers
- Add interactive sessions: pod binding, command execution, SSE streaming,
  ephemeral preview URLs with session cleanup workers
- Add GET /workers/pool endpoint for aggregate capacity and queue depth
- Add sessions:read and sessions:execute auth scopes

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 19:11:28 -07:00

215 lines
5.4 KiB
Go

package worker
import (
"context"
"database/sql"
"sync"
"time"
"github.com/orchard9/rdev/internal/logging"
)
// resourceReconciler abstracts the K8s operations needed by the GC worker.
// The concrete deployer.Deployer satisfies this interface implicitly.
type resourceReconciler interface {
ListProjectLabels(ctx context.Context) ([]string, error)
GetOldestResourceTime(ctx context.Context, projectName string) (time.Time, bool, error)
UndeployAll(ctx context.Context, projectName string) error
}
// projectChecker determines whether a project exists in the database.
type projectChecker interface {
projectExists(ctx context.Context, projectID string) (bool, error)
}
// dbProjectChecker checks project existence via SQL.
type dbProjectChecker struct {
db *sql.DB
}
func (c *dbProjectChecker) projectExists(ctx context.Context, projectID string) (bool, error) {
var exists bool
err := c.db.QueryRowContext(ctx, "SELECT EXISTS(SELECT 1 FROM projects WHERE id = $1)", projectID).Scan(&exists)
return exists, err
}
// ResourceGCConfig holds configuration for the GC worker.
type ResourceGCConfig struct {
// MinAge is the minimum resource age before deletion. Default: 1 hour.
MinAge time.Duration
// ReconcileInterval is how often to run reconciliation. Default: 15 minutes.
ReconcileInterval time.Duration
}
// DefaultResourceGCConfig returns sensible defaults.
func DefaultResourceGCConfig() *ResourceGCConfig {
return &ResourceGCConfig{
MinAge: 1 * time.Hour,
ReconcileInterval: 15 * time.Minute,
}
}
// ResourceGC periodically finds K8s resources whose project label doesn't
// match any project in the database, and deletes them after a safety window.
type ResourceGC struct {
reconciler resourceReconciler
checker projectChecker
config *ResourceGCConfig
ctx context.Context
cancel context.CancelFunc
wg sync.WaitGroup
}
// NewResourceGC creates a new resource GC worker.
// The reconciler must implement ListProjectLabels, GetOldestResourceTime, and UndeployAll
// (deployer.Deployer satisfies this). If cfg is nil, defaults are used.
func NewResourceGC(reconciler resourceReconciler, db *sql.DB, cfg *ResourceGCConfig) *ResourceGC {
if cfg == nil {
cfg = DefaultResourceGCConfig()
}
ctx, cancel := context.WithCancel(context.Background())
return &ResourceGC{
reconciler: reconciler,
checker: &dbProjectChecker{db: db},
config: cfg,
ctx: ctx,
cancel: cancel,
}
}
// newResourceGCWithChecker creates a ResourceGC with a custom project checker (for testing).
func newResourceGCWithChecker(reconciler resourceReconciler, checker projectChecker, cfg *ResourceGCConfig) *ResourceGC {
if cfg == nil {
cfg = DefaultResourceGCConfig()
}
ctx, cancel := context.WithCancel(context.Background())
return &ResourceGC{
reconciler: reconciler,
checker: checker,
config: cfg,
ctx: ctx,
cancel: cancel,
}
}
// Start begins the reconciliation loop.
func (g *ResourceGC) Start() {
log := logging.FromContext(g.ctx).WithWorker("resource-gc")
log.Info("resource GC started",
"min_age", g.config.MinAge,
"reconcile_interval", g.config.ReconcileInterval,
)
g.wg.Add(1)
go g.reconcileLoop()
}
// Stop gracefully shuts down the GC worker.
func (g *ResourceGC) Stop() {
log := logging.FromContext(g.ctx).WithWorker("resource-gc")
log.Info("resource GC stopping")
g.cancel()
g.wg.Wait()
log.Info("resource GC stopped")
}
func (g *ResourceGC) reconcileLoop() {
defer g.wg.Done()
// Run immediately on start
g.runReconciliation()
ticker := time.NewTicker(g.config.ReconcileInterval)
defer ticker.Stop()
for {
select {
case <-g.ctx.Done():
return
case <-ticker.C:
g.runReconciliation()
}
}
}
func (g *ResourceGC) runReconciliation() {
ctx, cancel := context.WithTimeout(g.ctx, TimeoutWorkExecution)
defer cancel()
log := logging.FromContext(ctx).WithWorker("resource-gc")
labels, err := g.reconciler.ListProjectLabels(ctx)
if err != nil {
log.Error("failed to list project labels", logging.FieldError, err)
return
}
var deleted, skipped, errCount int
var firstErr error
for _, project := range labels {
exists, err := g.checker.projectExists(ctx, project)
if err != nil {
log.Error("failed to check project existence",
logging.FieldProjectID, project,
logging.FieldError, err,
)
continue
}
if exists {
continue
}
// Project not in DB — check resource age before deleting
oldest, found, err := g.reconciler.GetOldestResourceTime(ctx, project)
if err != nil {
log.Error("failed to get resource age",
logging.FieldProjectID, project,
logging.FieldError, err,
)
continue
}
if !found {
continue
}
age := time.Since(oldest)
if age < g.config.MinAge {
log.Info("skipping young orphan",
logging.FieldProjectID, project,
"age", age,
"min_age", g.config.MinAge,
)
skipped++
continue
}
log.Info("deleting orphaned resources",
logging.FieldProjectID, project,
"age", age,
)
if err := g.reconciler.UndeployAll(ctx, project); err != nil {
log.Error("failed to delete orphaned resources",
logging.FieldProjectID, project,
logging.FieldError, err,
)
errCount++
if firstErr == nil {
firstErr = err
}
continue
}
deleted++
}
if deleted > 0 || skipped > 0 || errCount > 0 {
log.Info("reconciliation complete",
"deleted", deleted,
"skipped", skipped,
"errors", errCount,
"total_labels", len(labels),
)
}
}