rdev/internal/cmdlimit/cmdlimit.go
jordan 538ea57ed4 feat: Add claude-config API, security hardening, and testing infrastructure
Claude Config API (v0.6):
- Add CRUD endpoints for commands, skills, and agents
- Commands/skills/agents stored in /workspace/.claude/ (per-project, in git)
- Credentials shared via PVC at /root/.claude/ (shared across pods)
- Use base64 encoding for file writes (prevents shell injection)
- Add content size limits (1MB max)

Security Hardening:
- Add sanitize package for command/prompt validation
- Add rate limiting middleware (token bucket algorithm)
- Add concurrent command limiting
- Add input sanitization to all command handlers
- Gitignore secrets.yaml and credentials.yaml
- Add *.example templates for secrets

Testing Infrastructure:
- Add testutil package with mocks and fixtures
- Add unit tests for auth package (63% coverage)
- Add unit tests for executor (47% coverage)
- Add handler integration tests (40% coverage)
- Add 100% coverage for sanitize, cmdlimit packages
- Add 96% coverage for ratelimit package

Infrastructure:
- Shared Claude credentials PVC (ReadWriteMany)
- Reduced workspace PVC size from 20Gi to 5Gi
- Add init container cleanup before git clone
- Document Longhorn RWX requirements

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-25 01:29:13 -07:00

198 lines
4.7 KiB
Go

// Package cmdlimit provides concurrent command limiting to prevent resource exhaustion.
package cmdlimit
import (
"context"
"errors"
"sync"
"time"
)
// ErrLimitExceeded is returned when the concurrent command limit is reached.
var ErrLimitExceeded = errors.New("concurrent command limit exceeded")
// Config defines the limiter configuration.
type Config struct {
// MaxConcurrentPerProject is the maximum concurrent commands per project.
// Defaults to 5.
MaxConcurrentPerProject int
// MaxConcurrentTotal is the maximum concurrent commands across all projects.
// Defaults to 20.
MaxConcurrentTotal int
// CommandTimeout is the maximum duration a command can hold a slot.
// After this duration, the slot is automatically released.
// Defaults to 30 minutes.
CommandTimeout time.Duration
}
// DefaultConfig returns sensible defaults.
func DefaultConfig() Config {
return Config{
MaxConcurrentPerProject: 5,
MaxConcurrentTotal: 20,
CommandTimeout: 30 * time.Minute,
}
}
// Limiter tracks and enforces concurrent command limits.
type Limiter struct {
cfg Config
mu sync.Mutex
projectCounts map[string]int
totalCount int
activeCommands map[string]*activeCommand
}
type activeCommand struct {
projectID string
startedAt time.Time
cancel context.CancelFunc
}
// New creates a new concurrent command limiter.
func New(cfg Config) *Limiter {
if cfg.MaxConcurrentPerProject <= 0 {
cfg.MaxConcurrentPerProject = 5
}
if cfg.MaxConcurrentTotal <= 0 {
cfg.MaxConcurrentTotal = 20
}
if cfg.CommandTimeout <= 0 {
cfg.CommandTimeout = 30 * time.Minute
}
return &Limiter{
cfg: cfg,
projectCounts: make(map[string]int),
activeCommands: make(map[string]*activeCommand),
}
}
// Acquire attempts to acquire a command slot for the given project.
// Returns a release function that MUST be called when the command completes.
// Returns ErrLimitExceeded if the limit is reached.
func (l *Limiter) Acquire(ctx context.Context, projectID, commandID string) (release func(), err error) {
l.mu.Lock()
defer l.mu.Unlock()
// Check total limit
if l.totalCount >= l.cfg.MaxConcurrentTotal {
return nil, ErrLimitExceeded
}
// Check per-project limit
if l.projectCounts[projectID] >= l.cfg.MaxConcurrentPerProject {
return nil, ErrLimitExceeded
}
// Acquire the slot
l.totalCount++
l.projectCounts[projectID]++
// Create a context with timeout for automatic release
cmdCtx, cancel := context.WithTimeout(ctx, l.cfg.CommandTimeout)
l.activeCommands[commandID] = &activeCommand{
projectID: projectID,
startedAt: time.Now(),
cancel: cancel,
}
// Start a goroutine to auto-release on timeout
go func() {
<-cmdCtx.Done()
l.release(commandID)
}()
// Return release function
return func() {
cancel()
l.release(commandID)
}, nil
}
// release decrements the counters for a command.
func (l *Limiter) release(commandID string) {
l.mu.Lock()
defer l.mu.Unlock()
cmd, exists := l.activeCommands[commandID]
if !exists {
return // Already released
}
delete(l.activeCommands, commandID)
l.totalCount--
l.projectCounts[cmd.projectID]--
if l.projectCounts[cmd.projectID] <= 0 {
delete(l.projectCounts, cmd.projectID)
}
}
// Stats returns current usage statistics.
func (l *Limiter) Stats() Stats {
l.mu.Lock()
defer l.mu.Unlock()
projectStats := make(map[string]int)
for k, v := range l.projectCounts {
projectStats[k] = v
}
return Stats{
TotalActive: l.totalCount,
MaxTotal: l.cfg.MaxConcurrentTotal,
ProjectCounts: projectStats,
MaxPerProject: l.cfg.MaxConcurrentPerProject,
ActiveCommandIDs: l.getActiveCommandIDs(),
}
}
func (l *Limiter) getActiveCommandIDs() []string {
ids := make([]string, 0, len(l.activeCommands))
for id := range l.activeCommands {
ids = append(ids, id)
}
return ids
}
// Stats contains current limiter statistics.
type Stats struct {
TotalActive int
MaxTotal int
ProjectCounts map[string]int
MaxPerProject int
ActiveCommandIDs []string
}
// IsProjectAtLimit checks if a project has reached its limit.
func (l *Limiter) IsProjectAtLimit(projectID string) bool {
l.mu.Lock()
defer l.mu.Unlock()
return l.projectCounts[projectID] >= l.cfg.MaxConcurrentPerProject
}
// IsTotalAtLimit checks if the total limit has been reached.
func (l *Limiter) IsTotalAtLimit() bool {
l.mu.Lock()
defer l.mu.Unlock()
return l.totalCount >= l.cfg.MaxConcurrentTotal
}
// ActiveCount returns the number of active commands for a project.
func (l *Limiter) ActiveCount(projectID string) int {
l.mu.Lock()
defer l.mu.Unlock()
return l.projectCounts[projectID]
}
// TotalActiveCount returns the total number of active commands.
func (l *Limiter) TotalActiveCount() int {
l.mu.Lock()
defer l.mu.Unlock()
return l.totalCount
}