rdev/internal/domain/work.go
jordan d69da6d627 feat: add structured logging infrastructure and SDLC extensions
Major changes:
- Add internal/logging package with field constants, context propagation,
  sensitive data auto-redaction, and per-component log levels
- Add worker timeout constants (TimeoutQuickOp, TimeoutHealthCheck, etc.)
- Extend SDLC with callback handlers, generate endpoints, and executor
- Add new cookbook trees for aeries and slackpath progression
- Add skeleton templates for queue, realtime, and microservices
- Add worker component template with async job processing
- Refactor services and handlers to use new logging infrastructure
- Split component.go into component_infra.go and component_listing.go

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 22:56:04 -07:00

328 lines
8.9 KiB
Go

package domain
import "time"
// WorkTaskStatus represents the status of a work task.
type WorkTaskStatus string
const (
WorkTaskStatusPending WorkTaskStatus = "pending"
WorkTaskStatusRunning WorkTaskStatus = "running"
WorkTaskStatusCompleted WorkTaskStatus = "completed"
WorkTaskStatusFailed WorkTaskStatus = "failed"
WorkTaskStatusCancelled WorkTaskStatus = "cancelled"
)
// WorkErrorCode represents a categorized error type for failed tasks.
// This enables clients to distinguish between different failure modes
// and take appropriate action (e.g., retry vs wait vs report).
type WorkErrorCode string
const (
// WorkErrorCodeNone indicates no error (task succeeded or still running).
WorkErrorCodeNone WorkErrorCode = ""
// WorkErrorCodeRateLimited indicates the agent hit its rate limit.
// Client should wait for the limit to reset before retrying.
WorkErrorCodeRateLimited WorkErrorCode = "RATE_LIMITED"
// WorkErrorCodeAuthFailed indicates authentication/authorization failure.
// Requires manual intervention to re-authenticate the agent.
WorkErrorCodeAuthFailed WorkErrorCode = "AUTH_FAILED"
// WorkErrorCodeTimeout indicates the task exceeded its time limit.
// May be retried, possibly with a longer timeout or simpler prompt.
WorkErrorCodeTimeout WorkErrorCode = "TIMEOUT"
// WorkErrorCodeStaleWorker indicates the worker stopped responding.
// The task was recovered by maintenance and can be retried.
WorkErrorCodeStaleWorker WorkErrorCode = "STALE_WORKER"
// WorkErrorCodeAgentError indicates a generic agent execution error.
// The error message contains details.
WorkErrorCodeAgentError WorkErrorCode = "AGENT_ERROR"
// WorkErrorCodeInvalidSpec indicates the task specification was invalid.
// Should not be retried without fixing the spec.
WorkErrorCodeInvalidSpec WorkErrorCode = "INVALID_SPEC"
)
// ClassifyAgentError examines an error message and stderr output to determine
// the appropriate error code. This enables automated handling of known failure modes.
func ClassifyAgentError(errMsg, stderr string) WorkErrorCode {
combined := errMsg + "\n" + stderr
// Rate limit detection - Claude Code specific messages
rateLimitPatterns := []string{
"You've hit your limit",
"rate limit",
"Rate limit",
"too many requests",
"Too many requests",
"quota exceeded",
"Quota exceeded",
}
for _, pattern := range rateLimitPatterns {
if containsIgnoreCase(combined, pattern) {
return WorkErrorCodeRateLimited
}
}
// Authentication failure detection
authPatterns := []string{
"not authenticated",
"authentication failed",
"unauthorized",
"Unauthorized",
"invalid api key",
"Invalid API key",
"please log in",
"Please log in",
"claude login",
}
for _, pattern := range authPatterns {
if containsIgnoreCase(combined, pattern) {
return WorkErrorCodeAuthFailed
}
}
// Timeout detection
timeoutPatterns := []string{
"context deadline exceeded",
"context canceled",
"timeout",
"Timeout",
"timed out",
}
for _, pattern := range timeoutPatterns {
if containsIgnoreCase(combined, pattern) {
return WorkErrorCodeTimeout
}
}
// Default to generic agent error
return WorkErrorCodeAgentError
}
// containsIgnoreCase checks if s contains substr (case-insensitive).
func containsIgnoreCase(s, substr string) bool {
return len(s) >= len(substr) &&
(s == substr ||
len(substr) == 0 ||
findIgnoreCase(s, substr) >= 0)
}
// findIgnoreCase finds substr in s (case-insensitive), returns -1 if not found.
func findIgnoreCase(s, substr string) int {
if len(substr) == 0 {
return 0
}
if len(s) < len(substr) {
return -1
}
// Simple linear search with case-insensitive comparison
for i := 0; i <= len(s)-len(substr); i++ {
match := true
for j := 0; j < len(substr); j++ {
sc := s[i+j]
pc := substr[j]
// ASCII lowercase conversion
if sc >= 'A' && sc <= 'Z' {
sc += 'a' - 'A'
}
if pc >= 'A' && pc <= 'Z' {
pc += 'a' - 'A'
}
if sc != pc {
match = false
break
}
}
if match {
return i
}
}
return -1
}
// IsValid returns true if the status is a known valid status.
func (s WorkTaskStatus) IsValid() bool {
switch s {
case WorkTaskStatusPending, WorkTaskStatusRunning, WorkTaskStatusCompleted,
WorkTaskStatusFailed, WorkTaskStatusCancelled:
return true
}
return false
}
// WorkTaskType represents the type of work task.
type WorkTaskType string
const (
WorkTaskTypeBuild WorkTaskType = "build"
WorkTaskTypeTest WorkTaskType = "test"
WorkTaskTypeDeploy WorkTaskType = "deploy"
WorkTaskTypeCustom WorkTaskType = "custom"
WorkTaskTypeVerify WorkTaskType = "verify"
WorkTaskTypeSDLC WorkTaskType = "sdlc"
)
// IsValid returns true if the task type is a known valid type.
func (t WorkTaskType) IsValid() bool {
switch t {
case WorkTaskTypeBuild, WorkTaskTypeTest, WorkTaskTypeDeploy, WorkTaskTypeCustom, WorkTaskTypeVerify, WorkTaskTypeSDLC:
return true
}
return false
}
// SDLCTaskSpec defines parameters for SDLC CLI commands via worker pool.
// Used for skeleton/monorepo projects that don't have a dedicated pod.
type SDLCTaskSpec struct {
// Command is the SDLC CLI command to execute (e.g., "feature-create", "artifact-approve").
Command string `json:"command"`
// Args contains CLI arguments for the command.
Args []string `json:"args"`
// GitCloneURL is the repository URL for cloning (required for worker execution).
GitCloneURL string `json:"git_clone_url"`
// AutoCommit indicates whether to commit .sdlc/ changes after execution.
AutoCommit bool `json:"auto_commit"`
// AutoPush indicates whether to push commits after commit.
AutoPush bool `json:"auto_push"`
}
// WorkTask represents a task in the work queue.
type WorkTask struct {
// ID is the unique task identifier.
ID string
// ProjectID is the project this task belongs to.
ProjectID string
// Type is the task type (build, test, deploy, custom).
Type WorkTaskType
// Spec contains task-specific parameters.
// For build tasks: template, prompt, variables, auto_deploy, git_url
// For test tasks: test_command, git_url
// For deploy tasks: image, replicas, env
Spec map[string]any
// Status is the current task status.
Status WorkTaskStatus
// Priority determines execution order (higher = more urgent).
Priority int
// WorkerID is the ID of the worker that claimed this task.
WorkerID string
// CallbackURL is the webhook URL for completion notification.
CallbackURL string
// CreatedAt is when the task was created.
CreatedAt time.Time
// StartedAt is when a worker started executing the task.
StartedAt *time.Time
// CompletedAt is when the task finished (success or failure).
CompletedAt *time.Time
// Result contains the task output (if completed).
Result *WorkResult
// Error contains the error message (if failed).
Error string
// ErrorCode categorizes the failure type for programmatic handling.
// Only set when Status is WorkTaskStatusFailed.
ErrorCode WorkErrorCode
// RetryCount is the number of retry attempts.
RetryCount int
// MaxRetries is the maximum allowed retry attempts.
MaxRetries int
}
// WorkResult contains the result of a completed task.
type WorkResult struct {
// Output is the main output from task execution.
Output string `json:"output,omitempty"`
// Artifacts contains named artifacts from the task.
// For build tasks: commit_sha, deploy_url, etc.
Artifacts map[string]string `json:"artifacts,omitempty"`
}
// WorkQueueStats contains queue statistics.
type WorkQueueStats struct {
// Pending is the count of pending tasks.
Pending int64 `json:"pending"`
// Running is the count of running tasks.
Running int64 `json:"running"`
// Completed is the count of completed tasks (last 24h).
Completed int64 `json:"completed"`
// Failed is the count of failed tasks (last 24h).
Failed int64 `json:"failed"`
// Cancelled is the count of cancelled tasks (last 24h).
Cancelled int64 `json:"cancelled"`
// OldestPending is the age of the oldest pending task.
OldestPending *time.Duration `json:"oldest_pending,omitempty"`
}
// WorkListOptions contains pagination options for listing tasks.
type WorkListOptions struct {
// Limit is the maximum number of tasks to return (default: 50, max: 100).
Limit int
// Offset is the number of tasks to skip (for pagination).
Offset int
}
// DefaultWorkListOptions returns options with default values.
func DefaultWorkListOptions() WorkListOptions {
return WorkListOptions{
Limit: 50,
Offset: 0,
}
}
// Normalize applies defaults and limits to the options.
func (o *WorkListOptions) Normalize() {
if o.Limit <= 0 {
o.Limit = 50
}
if o.Limit > 100 {
o.Limit = 100
}
if o.Offset < 0 {
o.Offset = 0
}
}
// WorkListResult contains paginated task results.
type WorkListResult struct {
// Tasks is the list of tasks.
Tasks []*WorkTask
// Total is the total count of matching tasks (for pagination metadata).
Total int64
// Limit is the limit that was applied.
Limit int
// Offset is the offset that was applied.
Offset int
}