Implements horizontally-scalable worker pool architecture: - claudebox-sidecar: HTTP server for Claude Code, git, and SDLC ops - rdev-worker: standalone worker binary polling rdev-api for tasks - HTTP client adapter for sidecar communication - HPA with custom Prometheus metrics for autoscaling - ServiceMonitor for metrics scraping Code review fixes applied: - URL-encode query parameters in GitStatus (Critical #1) - Remove unused shellQuote function (Critical #2) - Use stdlib strings.Split/TrimSpace (Critical #3) - Add version injection via ldflags (Warning #4) - Add debug logging for swallowed git/sdlc errors (Warning #5, #6) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
52 lines
1.4 KiB
YAML
52 lines
1.4 KiB
YAML
# HorizontalPodAutoscaler for rdev-worker based on queue depth.
|
|
# Scales workers up when pending tasks accumulate, scales down when queue drains.
|
|
apiVersion: autoscaling/v2
|
|
kind: HorizontalPodAutoscaler
|
|
metadata:
|
|
name: rdev-worker
|
|
namespace: rdev
|
|
labels:
|
|
app.kubernetes.io/name: rdev-worker
|
|
app.kubernetes.io/part-of: rdev
|
|
spec:
|
|
scaleTargetRef:
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
name: rdev-worker
|
|
minReplicas: 1
|
|
maxReplicas: 10
|
|
|
|
metrics:
|
|
# Scale based on pending tasks in the work queue
|
|
- type: External
|
|
external:
|
|
metric:
|
|
name: rdev_pending_tasks
|
|
target:
|
|
type: AverageValue
|
|
# Target 2 pending tasks per worker
|
|
# With 2 workers and 4 pending, we'd scale up
|
|
averageValue: "2"
|
|
|
|
behavior:
|
|
# Scale up quickly when work accumulates
|
|
scaleUp:
|
|
stabilizationWindowSeconds: 60 # Wait 1 minute before scaling up again
|
|
policies:
|
|
- type: Pods
|
|
value: 2 # Add up to 2 pods at a time
|
|
periodSeconds: 60
|
|
- type: Percent
|
|
value: 100 # Or double the current count
|
|
periodSeconds: 60
|
|
selectPolicy: Max
|
|
|
|
# Scale down slowly to avoid thrashing
|
|
scaleDown:
|
|
stabilizationWindowSeconds: 300 # Wait 5 minutes before scaling down
|
|
policies:
|
|
- type: Pods
|
|
value: 1 # Remove 1 pod at a time
|
|
periodSeconds: 120
|
|
selectPolicy: Min
|