package routing import ( "sync" "time" ) // CircuitBreaker tracks provider failures with tiered cooldowns. // Thread-safe for concurrent use. Implements CooldownTracker. // // The circuit breaker applies different cooldown durations based on error type: // - Rate limits (429) and quota errors: DefaultCooldownPeriod (1 hour) // - Transient server errors (5xx): TransientCooldownPeriod (30 seconds) // // This allows quick fallback to backup providers while still giving // the primary provider time to recover from transient issues. // // Note: Exempt providers (ExemptProviders map) never enter cooldown. // This is enforced by the executor, not the circuit breaker itself. type CircuitBreaker struct { mu sync.RWMutex failedUntil map[string]time.Time // provider name -> time when cooldown ends cooldownPeriod time.Duration // for rate limits/quota transientCooldownPeriod time.Duration // for server errors } // NewCircuitBreaker creates a circuit breaker with default tiered cooldowns. // If cooldown is 0, DefaultCooldownPeriod (1 hour) is used for rate limits. func NewCircuitBreaker(cooldown time.Duration) *CircuitBreaker { if cooldown == 0 { cooldown = DefaultCooldownPeriod } return &CircuitBreaker{ failedUntil: make(map[string]time.Time), cooldownPeriod: cooldown, transientCooldownPeriod: TransientCooldownPeriod, } } // NewCircuitBreakerWithTransientCooldown creates a circuit breaker with // custom cooldown periods for rate limits and transient errors. func NewCircuitBreakerWithTransientCooldown(rateLimitCooldown, transientCooldown time.Duration) *CircuitBreaker { if rateLimitCooldown == 0 { rateLimitCooldown = DefaultCooldownPeriod } if transientCooldown == 0 { transientCooldown = TransientCooldownPeriod } return &CircuitBreaker{ failedUntil: make(map[string]time.Time), cooldownPeriod: rateLimitCooldown, transientCooldownPeriod: transientCooldown, } } // RecordFailure applies tiered cooldowns based on error classification. // Returns true if the provider entered cooldown. // // The cooldown duration depends on the error type: // - Rate limit (429) or quota errors: long cooldown // - Transient server errors (5xx): short cooldown // - Other errors: no cooldown func (cb *CircuitBreaker) RecordFailure(providerName string, err error) bool { failureType := ClassifyError(err) if failureType == FailureTypeNone { return false } cb.mu.Lock() defer cb.mu.Unlock() var cooldown time.Duration switch failureType { case FailureTypeRateLimit: cooldown = cb.cooldownPeriod case FailureTypeTransient: cooldown = cb.transientCooldownPeriod default: return false } cb.failedUntil[providerName] = time.Now().Add(cooldown) return true } // IsAvailable returns true if the provider is not in cooldown. func (cb *CircuitBreaker) IsAvailable(providerName string) bool { cb.mu.RLock() defer cb.mu.RUnlock() until, exists := cb.failedUntil[providerName] if !exists { return true } return time.Now().After(until) } // CooldownRemaining returns how long until a provider's cooldown expires. // Returns 0 if the provider is not in cooldown. func (cb *CircuitBreaker) CooldownRemaining(providerName string) time.Duration { cb.mu.RLock() defer cb.mu.RUnlock() until, exists := cb.failedUntil[providerName] if !exists { return 0 } remaining := time.Until(until) if remaining < 0 { return 0 } return remaining } // Reset removes a provider from cooldown, making it available immediately. func (cb *CircuitBreaker) Reset(providerName string) { cb.mu.Lock() defer cb.mu.Unlock() delete(cb.failedUntil, providerName) } // ResetAll clears all cooldowns. func (cb *CircuitBreaker) ResetAll() { cb.mu.Lock() defer cb.mu.Unlock() cb.failedUntil = make(map[string]time.Time) } // Compile-time interface check var _ CooldownTracker = (*CircuitBreaker)(nil)