persona-community-3/pkg/routing/circuit_breaker.go
jordan f53b908499
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
ci/woodpecker/manual/woodpecker Pipeline was successful
Initialize project from skeleton template
2026-02-23 11:10:35 +00:00

134 lines
3.9 KiB
Go

package routing
import (
"sync"
"time"
)
// CircuitBreaker tracks provider failures with tiered cooldowns.
// Thread-safe for concurrent use. Implements CooldownTracker.
//
// The circuit breaker applies different cooldown durations based on error type:
// - Rate limits (429) and quota errors: DefaultCooldownPeriod (1 hour)
// - Transient server errors (5xx): TransientCooldownPeriod (30 seconds)
//
// This allows quick fallback to backup providers while still giving
// the primary provider time to recover from transient issues.
//
// Note: Exempt providers (ExemptProviders map) never enter cooldown.
// This is enforced by the executor, not the circuit breaker itself.
type CircuitBreaker struct {
mu sync.RWMutex
failedUntil map[string]time.Time // provider name -> time when cooldown ends
cooldownPeriod time.Duration // for rate limits/quota
transientCooldownPeriod time.Duration // for server errors
}
// NewCircuitBreaker creates a circuit breaker with default tiered cooldowns.
// If cooldown is 0, DefaultCooldownPeriod (1 hour) is used for rate limits.
func NewCircuitBreaker(cooldown time.Duration) *CircuitBreaker {
if cooldown == 0 {
cooldown = DefaultCooldownPeriod
}
return &CircuitBreaker{
failedUntil: make(map[string]time.Time),
cooldownPeriod: cooldown,
transientCooldownPeriod: TransientCooldownPeriod,
}
}
// NewCircuitBreakerWithTransientCooldown creates a circuit breaker with
// custom cooldown periods for rate limits and transient errors.
func NewCircuitBreakerWithTransientCooldown(rateLimitCooldown, transientCooldown time.Duration) *CircuitBreaker {
if rateLimitCooldown == 0 {
rateLimitCooldown = DefaultCooldownPeriod
}
if transientCooldown == 0 {
transientCooldown = TransientCooldownPeriod
}
return &CircuitBreaker{
failedUntil: make(map[string]time.Time),
cooldownPeriod: rateLimitCooldown,
transientCooldownPeriod: transientCooldown,
}
}
// RecordFailure applies tiered cooldowns based on error classification.
// Returns true if the provider entered cooldown.
//
// The cooldown duration depends on the error type:
// - Rate limit (429) or quota errors: long cooldown
// - Transient server errors (5xx): short cooldown
// - Other errors: no cooldown
func (cb *CircuitBreaker) RecordFailure(providerName string, err error) bool {
failureType := ClassifyError(err)
if failureType == FailureTypeNone {
return false
}
cb.mu.Lock()
defer cb.mu.Unlock()
var cooldown time.Duration
switch failureType {
case FailureTypeRateLimit:
cooldown = cb.cooldownPeriod
case FailureTypeTransient:
cooldown = cb.transientCooldownPeriod
default:
return false
}
cb.failedUntil[providerName] = time.Now().Add(cooldown)
return true
}
// IsAvailable returns true if the provider is not in cooldown.
func (cb *CircuitBreaker) IsAvailable(providerName string) bool {
cb.mu.RLock()
defer cb.mu.RUnlock()
until, exists := cb.failedUntil[providerName]
if !exists {
return true
}
return time.Now().After(until)
}
// CooldownRemaining returns how long until a provider's cooldown expires.
// Returns 0 if the provider is not in cooldown.
func (cb *CircuitBreaker) CooldownRemaining(providerName string) time.Duration {
cb.mu.RLock()
defer cb.mu.RUnlock()
until, exists := cb.failedUntil[providerName]
if !exists {
return 0
}
remaining := time.Until(until)
if remaining < 0 {
return 0
}
return remaining
}
// Reset removes a provider from cooldown, making it available immediately.
func (cb *CircuitBreaker) Reset(providerName string) {
cb.mu.Lock()
defer cb.mu.Unlock()
delete(cb.failedUntil, providerName)
}
// ResetAll clears all cooldowns.
func (cb *CircuitBreaker) ResetAll() {
cb.mu.Lock()
defer cb.mu.Unlock()
cb.failedUntil = make(map[string]time.Time)
}
// Compile-time interface check
var _ CooldownTracker = (*CircuitBreaker)(nil)