Major changes: - Add internal/logging package with field constants, context propagation, sensitive data auto-redaction, and per-component log levels - Add worker timeout constants (TimeoutQuickOp, TimeoutHealthCheck, etc.) - Extend SDLC with callback handlers, generate endpoints, and executor - Add new cookbook trees for aeries and slackpath progression - Add skeleton templates for queue, realtime, and microservices - Add worker component template with async job processing - Refactor services and handlers to use new logging infrastructure - Split component.go into component_infra.go and component_listing.go Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
328 lines
8.9 KiB
Go
328 lines
8.9 KiB
Go
package domain
|
|
|
|
import "time"
|
|
|
|
// WorkTaskStatus represents the status of a work task.
|
|
type WorkTaskStatus string
|
|
|
|
const (
|
|
WorkTaskStatusPending WorkTaskStatus = "pending"
|
|
WorkTaskStatusRunning WorkTaskStatus = "running"
|
|
WorkTaskStatusCompleted WorkTaskStatus = "completed"
|
|
WorkTaskStatusFailed WorkTaskStatus = "failed"
|
|
WorkTaskStatusCancelled WorkTaskStatus = "cancelled"
|
|
)
|
|
|
|
// WorkErrorCode represents a categorized error type for failed tasks.
|
|
// This enables clients to distinguish between different failure modes
|
|
// and take appropriate action (e.g., retry vs wait vs report).
|
|
type WorkErrorCode string
|
|
|
|
const (
|
|
// WorkErrorCodeNone indicates no error (task succeeded or still running).
|
|
WorkErrorCodeNone WorkErrorCode = ""
|
|
|
|
// WorkErrorCodeRateLimited indicates the agent hit its rate limit.
|
|
// Client should wait for the limit to reset before retrying.
|
|
WorkErrorCodeRateLimited WorkErrorCode = "RATE_LIMITED"
|
|
|
|
// WorkErrorCodeAuthFailed indicates authentication/authorization failure.
|
|
// Requires manual intervention to re-authenticate the agent.
|
|
WorkErrorCodeAuthFailed WorkErrorCode = "AUTH_FAILED"
|
|
|
|
// WorkErrorCodeTimeout indicates the task exceeded its time limit.
|
|
// May be retried, possibly with a longer timeout or simpler prompt.
|
|
WorkErrorCodeTimeout WorkErrorCode = "TIMEOUT"
|
|
|
|
// WorkErrorCodeStaleWorker indicates the worker stopped responding.
|
|
// The task was recovered by maintenance and can be retried.
|
|
WorkErrorCodeStaleWorker WorkErrorCode = "STALE_WORKER"
|
|
|
|
// WorkErrorCodeAgentError indicates a generic agent execution error.
|
|
// The error message contains details.
|
|
WorkErrorCodeAgentError WorkErrorCode = "AGENT_ERROR"
|
|
|
|
// WorkErrorCodeInvalidSpec indicates the task specification was invalid.
|
|
// Should not be retried without fixing the spec.
|
|
WorkErrorCodeInvalidSpec WorkErrorCode = "INVALID_SPEC"
|
|
)
|
|
|
|
// ClassifyAgentError examines an error message and stderr output to determine
|
|
// the appropriate error code. This enables automated handling of known failure modes.
|
|
func ClassifyAgentError(errMsg, stderr string) WorkErrorCode {
|
|
combined := errMsg + "\n" + stderr
|
|
|
|
// Rate limit detection - Claude Code specific messages
|
|
rateLimitPatterns := []string{
|
|
"You've hit your limit",
|
|
"rate limit",
|
|
"Rate limit",
|
|
"too many requests",
|
|
"Too many requests",
|
|
"quota exceeded",
|
|
"Quota exceeded",
|
|
}
|
|
for _, pattern := range rateLimitPatterns {
|
|
if containsIgnoreCase(combined, pattern) {
|
|
return WorkErrorCodeRateLimited
|
|
}
|
|
}
|
|
|
|
// Authentication failure detection
|
|
authPatterns := []string{
|
|
"not authenticated",
|
|
"authentication failed",
|
|
"unauthorized",
|
|
"Unauthorized",
|
|
"invalid api key",
|
|
"Invalid API key",
|
|
"please log in",
|
|
"Please log in",
|
|
"claude login",
|
|
}
|
|
for _, pattern := range authPatterns {
|
|
if containsIgnoreCase(combined, pattern) {
|
|
return WorkErrorCodeAuthFailed
|
|
}
|
|
}
|
|
|
|
// Timeout detection
|
|
timeoutPatterns := []string{
|
|
"context deadline exceeded",
|
|
"context canceled",
|
|
"timeout",
|
|
"Timeout",
|
|
"timed out",
|
|
}
|
|
for _, pattern := range timeoutPatterns {
|
|
if containsIgnoreCase(combined, pattern) {
|
|
return WorkErrorCodeTimeout
|
|
}
|
|
}
|
|
|
|
// Default to generic agent error
|
|
return WorkErrorCodeAgentError
|
|
}
|
|
|
|
// containsIgnoreCase checks if s contains substr (case-insensitive).
|
|
func containsIgnoreCase(s, substr string) bool {
|
|
return len(s) >= len(substr) &&
|
|
(s == substr ||
|
|
len(substr) == 0 ||
|
|
findIgnoreCase(s, substr) >= 0)
|
|
}
|
|
|
|
// findIgnoreCase finds substr in s (case-insensitive), returns -1 if not found.
|
|
func findIgnoreCase(s, substr string) int {
|
|
if len(substr) == 0 {
|
|
return 0
|
|
}
|
|
if len(s) < len(substr) {
|
|
return -1
|
|
}
|
|
// Simple linear search with case-insensitive comparison
|
|
for i := 0; i <= len(s)-len(substr); i++ {
|
|
match := true
|
|
for j := 0; j < len(substr); j++ {
|
|
sc := s[i+j]
|
|
pc := substr[j]
|
|
// ASCII lowercase conversion
|
|
if sc >= 'A' && sc <= 'Z' {
|
|
sc += 'a' - 'A'
|
|
}
|
|
if pc >= 'A' && pc <= 'Z' {
|
|
pc += 'a' - 'A'
|
|
}
|
|
if sc != pc {
|
|
match = false
|
|
break
|
|
}
|
|
}
|
|
if match {
|
|
return i
|
|
}
|
|
}
|
|
return -1
|
|
}
|
|
|
|
// IsValid returns true if the status is a known valid status.
|
|
func (s WorkTaskStatus) IsValid() bool {
|
|
switch s {
|
|
case WorkTaskStatusPending, WorkTaskStatusRunning, WorkTaskStatusCompleted,
|
|
WorkTaskStatusFailed, WorkTaskStatusCancelled:
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// WorkTaskType represents the type of work task.
|
|
type WorkTaskType string
|
|
|
|
const (
|
|
WorkTaskTypeBuild WorkTaskType = "build"
|
|
WorkTaskTypeTest WorkTaskType = "test"
|
|
WorkTaskTypeDeploy WorkTaskType = "deploy"
|
|
WorkTaskTypeCustom WorkTaskType = "custom"
|
|
WorkTaskTypeVerify WorkTaskType = "verify"
|
|
WorkTaskTypeSDLC WorkTaskType = "sdlc"
|
|
)
|
|
|
|
// IsValid returns true if the task type is a known valid type.
|
|
func (t WorkTaskType) IsValid() bool {
|
|
switch t {
|
|
case WorkTaskTypeBuild, WorkTaskTypeTest, WorkTaskTypeDeploy, WorkTaskTypeCustom, WorkTaskTypeVerify, WorkTaskTypeSDLC:
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// SDLCTaskSpec defines parameters for SDLC CLI commands via worker pool.
|
|
// Used for skeleton/monorepo projects that don't have a dedicated pod.
|
|
type SDLCTaskSpec struct {
|
|
// Command is the SDLC CLI command to execute (e.g., "feature-create", "artifact-approve").
|
|
Command string `json:"command"`
|
|
|
|
// Args contains CLI arguments for the command.
|
|
Args []string `json:"args"`
|
|
|
|
// GitCloneURL is the repository URL for cloning (required for worker execution).
|
|
GitCloneURL string `json:"git_clone_url"`
|
|
|
|
// AutoCommit indicates whether to commit .sdlc/ changes after execution.
|
|
AutoCommit bool `json:"auto_commit"`
|
|
|
|
// AutoPush indicates whether to push commits after commit.
|
|
AutoPush bool `json:"auto_push"`
|
|
}
|
|
|
|
// WorkTask represents a task in the work queue.
|
|
type WorkTask struct {
|
|
// ID is the unique task identifier.
|
|
ID string
|
|
|
|
// ProjectID is the project this task belongs to.
|
|
ProjectID string
|
|
|
|
// Type is the task type (build, test, deploy, custom).
|
|
Type WorkTaskType
|
|
|
|
// Spec contains task-specific parameters.
|
|
// For build tasks: template, prompt, variables, auto_deploy, git_url
|
|
// For test tasks: test_command, git_url
|
|
// For deploy tasks: image, replicas, env
|
|
Spec map[string]any
|
|
|
|
// Status is the current task status.
|
|
Status WorkTaskStatus
|
|
|
|
// Priority determines execution order (higher = more urgent).
|
|
Priority int
|
|
|
|
// WorkerID is the ID of the worker that claimed this task.
|
|
WorkerID string
|
|
|
|
// CallbackURL is the webhook URL for completion notification.
|
|
CallbackURL string
|
|
|
|
// CreatedAt is when the task was created.
|
|
CreatedAt time.Time
|
|
|
|
// StartedAt is when a worker started executing the task.
|
|
StartedAt *time.Time
|
|
|
|
// CompletedAt is when the task finished (success or failure).
|
|
CompletedAt *time.Time
|
|
|
|
// Result contains the task output (if completed).
|
|
Result *WorkResult
|
|
|
|
// Error contains the error message (if failed).
|
|
Error string
|
|
|
|
// ErrorCode categorizes the failure type for programmatic handling.
|
|
// Only set when Status is WorkTaskStatusFailed.
|
|
ErrorCode WorkErrorCode
|
|
|
|
// RetryCount is the number of retry attempts.
|
|
RetryCount int
|
|
|
|
// MaxRetries is the maximum allowed retry attempts.
|
|
MaxRetries int
|
|
}
|
|
|
|
// WorkResult contains the result of a completed task.
|
|
type WorkResult struct {
|
|
// Output is the main output from task execution.
|
|
Output string `json:"output,omitempty"`
|
|
|
|
// Artifacts contains named artifacts from the task.
|
|
// For build tasks: commit_sha, deploy_url, etc.
|
|
Artifacts map[string]string `json:"artifacts,omitempty"`
|
|
}
|
|
|
|
// WorkQueueStats contains queue statistics.
|
|
type WorkQueueStats struct {
|
|
// Pending is the count of pending tasks.
|
|
Pending int64 `json:"pending"`
|
|
|
|
// Running is the count of running tasks.
|
|
Running int64 `json:"running"`
|
|
|
|
// Completed is the count of completed tasks (last 24h).
|
|
Completed int64 `json:"completed"`
|
|
|
|
// Failed is the count of failed tasks (last 24h).
|
|
Failed int64 `json:"failed"`
|
|
|
|
// Cancelled is the count of cancelled tasks (last 24h).
|
|
Cancelled int64 `json:"cancelled"`
|
|
|
|
// OldestPending is the age of the oldest pending task.
|
|
OldestPending *time.Duration `json:"oldest_pending,omitempty"`
|
|
}
|
|
|
|
// WorkListOptions contains pagination options for listing tasks.
|
|
type WorkListOptions struct {
|
|
// Limit is the maximum number of tasks to return (default: 50, max: 100).
|
|
Limit int
|
|
|
|
// Offset is the number of tasks to skip (for pagination).
|
|
Offset int
|
|
}
|
|
|
|
// DefaultWorkListOptions returns options with default values.
|
|
func DefaultWorkListOptions() WorkListOptions {
|
|
return WorkListOptions{
|
|
Limit: 50,
|
|
Offset: 0,
|
|
}
|
|
}
|
|
|
|
// Normalize applies defaults and limits to the options.
|
|
func (o *WorkListOptions) Normalize() {
|
|
if o.Limit <= 0 {
|
|
o.Limit = 50
|
|
}
|
|
if o.Limit > 100 {
|
|
o.Limit = 100
|
|
}
|
|
if o.Offset < 0 {
|
|
o.Offset = 0
|
|
}
|
|
}
|
|
|
|
// WorkListResult contains paginated task results.
|
|
type WorkListResult struct {
|
|
// Tasks is the list of tasks.
|
|
Tasks []*WorkTask
|
|
|
|
// Total is the total count of matching tasks (for pagination metadata).
|
|
Total int64
|
|
|
|
// Limit is the limit that was applied.
|
|
Limit int
|
|
|
|
// Offset is the offset that was applied.
|
|
Offset int
|
|
}
|