sp4-verify-1770325799/pkg/queue/redis.go
rdev-worker 36d73dd23d
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
build: /implement-feature mesh-interop --requirements 'Chat Service must cal...
2026-02-05 21:40:58 +00:00

341 lines
8.6 KiB
Go

package queue
import (
"context"
"encoding/json"
"errors"
"fmt"
"time"
"github.com/google/uuid"
"github.com/redis/go-redis/v9"
"git.threesix.ai/jordan/sp4-verify-1770325799/pkg/logging"
)
const (
// RedisQueueKey is the sorted set key for the job queue
RedisQueueKey = "jobs:queue"
// RedisJobPrefix is the prefix for job data hash keys
RedisJobPrefix = "jobs:data:"
// RedisRunningKey is the set of currently running job IDs
RedisRunningKey = "jobs:running"
)
// RedisQueue implements Producer and Consumer using Redis.
// Uses sorted sets for priority ordering and lists for atomic dequeue.
type RedisQueue struct {
client *redis.Client
logger *logging.Logger
}
// Ensure RedisQueue implements Queue at compile time.
var _ Queue = (*RedisQueue)(nil)
// NewRedisQueue creates a queue backed by Redis.
func NewRedisQueue(client *redis.Client, logger *logging.Logger) *RedisQueue {
return &RedisQueue{
client: client,
logger: logger.WithComponent("redis-queue"),
}
}
// Enqueue adds a job to the queue with default options.
func (q *RedisQueue) Enqueue(ctx context.Context, jobType string, payload map[string]any) (string, error) {
return q.EnqueueWithOptions(ctx, Job{
Type: jobType,
Payload: payload,
Priority: 0,
MaxRetries: 3,
})
}
// EnqueueWithOptions adds a job with custom configuration.
func (q *RedisQueue) EnqueueWithOptions(ctx context.Context, job Job) (string, error) {
if job.Type == "" {
return "", fmt.Errorf("job type is required")
}
job.ID = uuid.New().String()
job.Status = StatusPending
job.CreatedAt = time.Now().UTC()
if job.MaxRetries == 0 {
job.MaxRetries = 3
}
if job.MaxRetries > 100 {
job.MaxRetries = 100
}
if job.Payload == nil {
job.Payload = make(map[string]any)
}
// Serialize job to JSON
data, err := json.Marshal(job)
if err != nil {
return "", fmt.Errorf("marshal job: %w", err)
}
// Use a pipeline for atomic operations
pipe := q.client.Pipeline()
// Store job data
jobKey := RedisJobPrefix + job.ID
pipe.Set(ctx, jobKey, data, 0)
// Add to sorted set with score = -priority (higher priority = lower score = first out)
// Secondary sort by timestamp for FIFO within same priority
score := float64(-job.Priority) + float64(job.CreatedAt.UnixNano())/1e18
pipe.ZAdd(ctx, RedisQueueKey, redis.Z{
Score: score,
Member: job.ID,
})
_, err = pipe.Exec(ctx)
if err != nil {
return "", fmt.Errorf("enqueue job: %w", err)
}
q.logger.Debug("job enqueued", "job_id", job.ID, "type", job.Type, "priority", job.Priority)
return job.ID, nil
}
// Dequeue atomically claims the next pending job.
func (q *RedisQueue) Dequeue(ctx context.Context, workerID string) (*Job, error) {
// Pop the highest priority job (lowest score) atomically
result, err := q.client.ZPopMin(ctx, RedisQueueKey, 1).Result()
if err != nil {
return nil, fmt.Errorf("dequeue job: %w", err)
}
if len(result) == 0 {
return nil, ErrNoJob
}
jobID := result[0].Member.(string)
jobKey := RedisJobPrefix + jobID
// Get job data
data, err := q.client.Get(ctx, jobKey).Bytes()
if err != nil {
if errors.Is(err, redis.Nil) {
return nil, ErrJobNotFound
}
return nil, fmt.Errorf("get job data: %w", err)
}
var job Job
if err := json.Unmarshal(data, &job); err != nil {
return nil, fmt.Errorf("unmarshal job: %w", err)
}
// Update job status
now := time.Now().UTC()
job.Status = StatusRunning
job.StartedAt = &now
job.WorkerID = workerID
// Save updated job and add to running set
updatedData, err := json.Marshal(job)
if err != nil {
return nil, fmt.Errorf("marshal updated job: %w", err)
}
pipe := q.client.Pipeline()
pipe.Set(ctx, jobKey, updatedData, 0)
pipe.SAdd(ctx, RedisRunningKey, jobID)
_, err = pipe.Exec(ctx)
if err != nil {
return nil, fmt.Errorf("update job status: %w", err)
}
q.logger.Debug("job dequeued", "job_id", job.ID, "type", job.Type, "worker_id", workerID)
return &job, nil
}
// Ack marks a job as successfully completed.
func (q *RedisQueue) Ack(ctx context.Context, jobID string) error {
jobKey := RedisJobPrefix + jobID
data, err := q.client.Get(ctx, jobKey).Bytes()
if err != nil {
if errors.Is(err, redis.Nil) {
return ErrJobNotFound
}
return fmt.Errorf("get job: %w", err)
}
var job Job
if err := json.Unmarshal(data, &job); err != nil {
return fmt.Errorf("unmarshal job: %w", err)
}
now := time.Now().UTC()
job.Status = StatusCompleted
job.CompletedAt = &now
updatedData, err := json.Marshal(job)
if err != nil {
return fmt.Errorf("marshal job: %w", err)
}
pipe := q.client.Pipeline()
pipe.Set(ctx, jobKey, updatedData, 24*time.Hour) // Keep completed jobs for 24h
pipe.SRem(ctx, RedisRunningKey, jobID)
_, err = pipe.Exec(ctx)
if err != nil {
return fmt.Errorf("ack job: %w", err)
}
q.logger.Debug("job completed", "job_id", jobID)
return nil
}
// Fail marks a job as failed, requeuing if retries remain.
func (q *RedisQueue) Fail(ctx context.Context, jobID string, errMsg string) error {
jobKey := RedisJobPrefix + jobID
data, err := q.client.Get(ctx, jobKey).Bytes()
if err != nil {
if errors.Is(err, redis.Nil) {
return ErrJobNotFound
}
return fmt.Errorf("get job: %w", err)
}
var job Job
if err := json.Unmarshal(data, &job); err != nil {
return fmt.Errorf("unmarshal job: %w", err)
}
job.RetryCount++
job.Error = errMsg
pipe := q.client.Pipeline()
pipe.SRem(ctx, RedisRunningKey, jobID)
if job.RetryCount >= job.MaxRetries {
// Exhausted retries - mark as failed
now := time.Now().UTC()
job.Status = StatusFailed
job.CompletedAt = &now
updatedData, err := json.Marshal(job)
if err != nil {
return fmt.Errorf("marshal job: %w", err)
}
pipe.Set(ctx, jobKey, updatedData, 24*time.Hour) // Keep failed jobs for 24h
} else {
// Requeue for retry
job.Status = StatusPending
job.StartedAt = nil
job.WorkerID = ""
updatedData, err := json.Marshal(job)
if err != nil {
return fmt.Errorf("marshal job: %w", err)
}
pipe.Set(ctx, jobKey, updatedData, 0)
// Re-add to queue with original priority
score := float64(-job.Priority) + float64(job.CreatedAt.UnixNano())/1e18
pipe.ZAdd(ctx, RedisQueueKey, redis.Z{
Score: score,
Member: jobID,
})
}
_, err = pipe.Exec(ctx)
if err != nil {
return fmt.Errorf("fail job: %w", err)
}
logErrMsg := errMsg
if len(logErrMsg) > 500 {
logErrMsg = logErrMsg[:497] + "..."
}
q.logger.Debug("job failed", "job_id", jobID, "retry_count", job.RetryCount, "max_retries", job.MaxRetries, "error", logErrMsg)
return nil
}
// Heartbeat extends the job's visibility timeout (no-op for Redis implementation).
func (q *RedisQueue) Heartbeat(ctx context.Context, jobID string) error {
// For Redis, we track running jobs in a set but don't have visibility timeout.
// This could be extended to use Redis EXPIRE on job keys if needed.
return nil
}
// GetJob retrieves a job by ID (for inspection/debugging).
func (q *RedisQueue) GetJob(ctx context.Context, jobID string) (*Job, error) {
jobKey := RedisJobPrefix + jobID
data, err := q.client.Get(ctx, jobKey).Bytes()
if err != nil {
if errors.Is(err, redis.Nil) {
return nil, ErrJobNotFound
}
return nil, fmt.Errorf("get job: %w", err)
}
var job Job
if err := json.Unmarshal(data, &job); err != nil {
return nil, fmt.Errorf("unmarshal job: %w", err)
}
return &job, nil
}
// QueueLength returns the number of pending jobs.
func (q *RedisQueue) QueueLength(ctx context.Context) (int64, error) {
return q.client.ZCard(ctx, RedisQueueKey).Result()
}
// RequeueStale requeues jobs that have been running too long.
func (q *RedisQueue) RequeueStale(ctx context.Context, timeout time.Duration) (int64, error) {
// Get all running job IDs
runningIDs, err := q.client.SMembers(ctx, RedisRunningKey).Result()
if err != nil {
return 0, fmt.Errorf("get running jobs: %w", err)
}
cutoff := time.Now().UTC().Add(-timeout)
var requeued int64
for _, jobID := range runningIDs {
job, err := q.GetJob(ctx, jobID)
if err != nil {
continue // Job may have been deleted
}
if job.StartedAt != nil && job.StartedAt.Before(cutoff) {
// Requeue stale job
job.Status = StatusPending
job.StartedAt = nil
job.WorkerID = ""
data, err := json.Marshal(job)
if err != nil {
continue
}
pipe := q.client.Pipeline()
pipe.Set(ctx, RedisJobPrefix+jobID, data, 0)
pipe.SRem(ctx, RedisRunningKey, jobID)
score := float64(-job.Priority) + float64(job.CreatedAt.UnixNano())/1e18
pipe.ZAdd(ctx, RedisQueueKey, redis.Z{
Score: score,
Member: jobID,
})
_, err = pipe.Exec(ctx)
if err == nil {
requeued++
}
}
}
if requeued > 0 {
q.logger.Info("requeued stale jobs", "count", requeued, "timeout", timeout)
}
return requeued, nil
}