From 9833725f315df6ed7bea18b9b57452db2d60d465 Mon Sep 17 00:00:00 2001 From: jordan Date: Sat, 7 Feb 2026 08:40:36 -0700 Subject: [PATCH] fix: preserve work on build retry, clear stale audit data Two critical fixes for build retry behavior: 1. pod_git_operations.go: Normalize remote URL before comparison - Clone stores URL with token (https://token:x@host/...) - Subsequent retry compares against URL without token - Without normalization, URLs never match, so workspace is always cleared and re-cloned, losing all code from previous attempt 2. build_audit.go: Clear stale result data when task transitions to running - When a failed task is retried, UpdateStatus only updated status/worker_id - Result and completed_at from previous failure remained, causing API to return stale failure data even while retry was running - Now clears result, completed_at and resets started_at when status is set to "running" Co-Authored-By: Claude Opus 4.5 --- internal/adapter/postgres/build_audit.go | 25 +++++++++++++++++++----- internal/worker/pod_git_operations.go | 11 +++++++++-- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/internal/adapter/postgres/build_audit.go b/internal/adapter/postgres/build_audit.go index 2f974af..df4dcc5 100644 --- a/internal/adapter/postgres/build_audit.go +++ b/internal/adapter/postgres/build_audit.go @@ -84,12 +84,27 @@ func (r *BuildAuditRepository) Update(ctx context.Context, taskID string, result } // UpdateStatus updates the status and worker assignment when a task is claimed. +// When status is "running" (task claimed for execution/retry), this also clears +// any stale result and completed_at from previous failed attempts. func (r *BuildAuditRepository) UpdateStatus(ctx context.Context, taskID string, status domain.BuildStatus, workerID string) error { - res, err := r.db.ExecContext(ctx, ` - UPDATE build_audit - SET status = $2, worker_id = $3 - WHERE task_id = $1 - `, taskID, status, nullString(workerID)) + // When a task transitions to running (claimed for execution or retry), + // clear stale result data from any previous failed attempts. + var query string + if status == domain.BuildStatusRunning { + query = ` + UPDATE build_audit + SET status = $2, worker_id = $3, result = NULL, completed_at = NULL, started_at = NOW() + WHERE task_id = $1 + ` + } else { + query = ` + UPDATE build_audit + SET status = $2, worker_id = $3 + WHERE task_id = $1 + ` + } + + res, err := r.db.ExecContext(ctx, query, taskID, status, nullString(workerID)) if err != nil { return fmt.Errorf("update build audit status: %w", err) } diff --git a/internal/worker/pod_git_operations.go b/internal/worker/pod_git_operations.go index fc26167..ef50a29 100644 --- a/internal/worker/pod_git_operations.go +++ b/internal/worker/pod_git_operations.go @@ -94,10 +94,17 @@ func (g *PodGitOperations) CloneRepo(ctx context.Context, podName, workDir, clon // Verify the remote URL matches the expected clone URL currentRemote, err := g.runGitInPodOutput(ctx, podName, workDir, "config", "--get", "remote.origin.url") currentRemote = strings.TrimSpace(currentRemote) + + // Strip token from currentRemote for comparison, since clone stores the authenticated URL + // Format: https://token:TOKEN@host/path -> https://host/path + normalizedRemote := currentRemote + if idx := strings.Index(currentRemote, "@"); idx != -1 && strings.HasPrefix(currentRemote, "https://") { + normalizedRemote = "https://" + currentRemote[idx+1:] + } expectedURL := cloneURL - // Normalize URLs for comparison (both should be HTTPS) - if err == nil && currentRemote == expectedURL { + // Normalize URLs for comparison (both should be HTTPS without credentials) + if err == nil && normalizedRemote == expectedURL { log.Info("workspace is already a git repo with correct remote, pulling latest", logging.FieldPodName, podName, "workDir", workDir,