rdev/internal/worker/pod_git_operations.go
jordan 9833725f31 fix: preserve work on build retry, clear stale audit data
Two critical fixes for build retry behavior:

1. pod_git_operations.go: Normalize remote URL before comparison
   - Clone stores URL with token (https://token:x@host/...)
   - Subsequent retry compares against URL without token
   - Without normalization, URLs never match, so workspace is always
     cleared and re-cloned, losing all code from previous attempt

2. build_audit.go: Clear stale result data when task transitions to running
   - When a failed task is retried, UpdateStatus only updated status/worker_id
   - Result and completed_at from previous failure remained, causing
     API to return stale failure data even while retry was running
   - Now clears result, completed_at and resets started_at when
     status is set to "running"

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-07 08:40:36 -07:00

346 lines
11 KiB
Go

package worker
import (
"bytes"
"context"
"fmt"
"os/exec"
"strings"
"github.com/orchard9/rdev/internal/logging"
)
// PodGitOperations provides git operations that run inside a Kubernetes pod
// via kubectl exec. This ensures git commands execute in the same environment
// where the code agent runs.
type PodGitOperations struct {
namespace string
giteaToken string
gitUser string
gitEmail string
}
// PodGitOperationsConfig configures pod git operations.
type PodGitOperationsConfig struct {
// Namespace is the Kubernetes namespace for kubectl exec.
Namespace string
// GiteaToken is the token for HTTPS push authentication.
GiteaToken string
// GitUser is the git commit author name.
GitUser string
// GitEmail is the git commit author email.
GitEmail string
}
// NewPodGitOperations creates a new pod git operations helper.
func NewPodGitOperations(cfg PodGitOperationsConfig) *PodGitOperations {
if cfg.GitUser == "" {
cfg.GitUser = "rdev-worker"
}
if cfg.GitEmail == "" {
cfg.GitEmail = "worker@threesix.ai"
}
return &PodGitOperations{
namespace: cfg.Namespace,
giteaToken: cfg.GiteaToken,
gitUser: cfg.GitUser,
gitEmail: cfg.GitEmail,
}
}
// PostBuildResult contains the result of post-build git operations.
type PostBuildResult struct {
HasChanges bool
CommitSHA string
FilesChanged []string
Pushed bool
Error error
}
// CloneResult contains the result of a git clone operation.
type CloneResult struct {
Cloned bool // True if repo was cloned, false if already existed
Error error
}
// IsGitRepo checks if the given directory is a git repository.
func (g *PodGitOperations) IsGitRepo(ctx context.Context, podName, workDir string) bool {
// Check if .git directory exists
kubectlArgs := []string{
"exec", "-n", g.namespace, podName, "--",
"test", "-d", workDir + "/.git",
}
cmd := exec.CommandContext(ctx, "kubectl", kubectlArgs...)
return cmd.Run() == nil
}
// CloneRepo clones a git repository into the workspace if it doesn't already exist.
// If the workspace already contains a git repo, it pulls the latest changes instead.
// If the workspace exists but is not a git repo, it clears the directory first.
func (g *PodGitOperations) CloneRepo(ctx context.Context, podName, workDir, cloneURL string) *CloneResult {
log := logging.FromContext(ctx).WithWorker("pod-git-ops")
result := &CloneResult{}
if cloneURL == "" {
result.Error = fmt.Errorf("git clone URL is required")
return result
}
// Check if already a git repo with the correct remote
if g.IsGitRepo(ctx, podName, workDir) {
// Verify the remote URL matches the expected clone URL
currentRemote, err := g.runGitInPodOutput(ctx, podName, workDir, "config", "--get", "remote.origin.url")
currentRemote = strings.TrimSpace(currentRemote)
// Strip token from currentRemote for comparison, since clone stores the authenticated URL
// Format: https://token:TOKEN@host/path -> https://host/path
normalizedRemote := currentRemote
if idx := strings.Index(currentRemote, "@"); idx != -1 && strings.HasPrefix(currentRemote, "https://") {
normalizedRemote = "https://" + currentRemote[idx+1:]
}
expectedURL := cloneURL
// Normalize URLs for comparison (both should be HTTPS without credentials)
if err == nil && normalizedRemote == expectedURL {
log.Info("workspace is already a git repo with correct remote, pulling latest",
logging.FieldPodName, podName,
"workDir", workDir,
)
// Pull latest changes
if err := g.runGitInPod(ctx, podName, workDir, "pull", "--ff-only"); err != nil {
// Pull failed, but repo exists - not fatal, might have local changes
log.Warn("git pull failed, continuing with existing state",
logging.FieldPodName, podName,
logging.FieldError, err,
)
}
return result
}
// Remote doesn't match - this is a different project's repo
log.Info("workspace has different git remote, will re-clone",
logging.FieldPodName, podName,
"workDir", workDir,
"currentRemote", currentRemote,
"expectedURL", expectedURL,
)
}
// Check if directory exists but is not a git repo - clear it first
if g.dirExists(ctx, podName, workDir) {
log.Info("workspace exists but is not a git repo, clearing",
logging.FieldPodName, podName,
"workDir", workDir,
)
// Clear the directory contents (but keep the directory itself)
clearArgs := []string{
"exec", "-n", g.namespace, podName, "--",
"sh", "-c", fmt.Sprintf("rm -rf %s/* %s/.[!.]*", workDir, workDir),
}
cmd := exec.CommandContext(ctx, "kubectl", clearArgs...)
if err := cmd.Run(); err != nil {
log.Warn("failed to clear workspace, attempting clone anyway",
logging.FieldPodName, podName,
logging.FieldError, err,
)
}
}
// Configure credential helper for clone (for private repos)
authCloneURL := cloneURL
if g.giteaToken != "" {
// Inject token into clone URL for authentication
// https://git.example.com/owner/repo.git -> https://token:TOKEN@git.example.com/owner/repo.git
authCloneURL = strings.Replace(cloneURL, "https://", "https://token:"+g.giteaToken+"@", 1)
}
log.Info("cloning repository",
logging.FieldPodName, podName,
"workDir", workDir,
"url", cloneURL, // Log without token
)
// Clone the repository
kubectlArgs := []string{
"exec", "-n", g.namespace, podName, "--",
"git", "clone", authCloneURL, workDir,
}
cmd := exec.CommandContext(ctx, "kubectl", kubectlArgs...)
var stderr bytes.Buffer
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
errMsg := g.redactToken(stderr.String())
result.Error = fmt.Errorf("git clone failed: %s: %s", err, errMsg)
return result
}
result.Cloned = true
log.Info("repository cloned successfully",
logging.FieldPodName, podName,
"workDir", workDir,
)
return result
}
// dirExists checks if a directory exists in the pod.
func (g *PodGitOperations) dirExists(ctx context.Context, podName, path string) bool {
kubectlArgs := []string{
"exec", "-n", g.namespace, podName, "--",
"test", "-d", path,
}
cmd := exec.CommandContext(ctx, "kubectl", kubectlArgs...)
return cmd.Run() == nil
}
// CommitAndPush performs post-build git operations inside the pod:
// 1. Configures git user/email
// 2. Checks for changes (git status)
// 3. Stages all changes (git add -A)
// 4. Commits with the given message
// 5. Pushes if requested
//
// This is the programmatic alternative to relying on LLMs for git operations.
func (g *PodGitOperations) CommitAndPush(ctx context.Context, podName, workDir, message string, push bool) *PostBuildResult {
log := logging.FromContext(ctx).WithWorker("pod-git-ops")
result := &PostBuildResult{}
// Configure git user for commits
if err := g.runGitInPod(ctx, podName, workDir, "config", "user.name", g.gitUser); err != nil {
result.Error = fmt.Errorf("git config user.name: %w", err)
return result
}
if err := g.runGitInPod(ctx, podName, workDir, "config", "user.email", g.gitEmail); err != nil {
result.Error = fmt.Errorf("git config user.email: %w", err)
return result
}
// Check for changes
status, err := g.runGitInPodOutput(ctx, podName, workDir, "status", "--porcelain")
if err != nil {
result.Error = fmt.Errorf("git status: %w", err)
return result
}
if strings.TrimSpace(status) == "" {
log.Info("no changes to commit", logging.FieldPodName, podName, "workDir", workDir)
return result
}
result.HasChanges = true
// Stage all changes
if err := g.runGitInPod(ctx, podName, workDir, "add", "-A"); err != nil {
result.Error = fmt.Errorf("git add: %w", err)
return result
}
// Get list of staged files
diffOutput, err := g.runGitInPodOutput(ctx, podName, workDir, "diff", "--cached", "--name-only")
if err != nil {
result.Error = fmt.Errorf("git diff: %w", err)
return result
}
for _, f := range strings.Split(strings.TrimSpace(diffOutput), "\n") {
if f != "" {
result.FilesChanged = append(result.FilesChanged, f)
}
}
// Commit
if err := g.runGitInPod(ctx, podName, workDir, "commit", "-m", message); err != nil {
result.Error = fmt.Errorf("git commit: %w", err)
return result
}
// Get commit SHA
sha, err := g.runGitInPodOutput(ctx, podName, workDir, "rev-parse", "HEAD")
if err != nil {
result.Error = fmt.Errorf("git rev-parse: %w", err)
return result
}
result.CommitSHA = strings.TrimSpace(sha)
log.Info("committed changes",
logging.FieldPodName, podName,
"sha", result.CommitSHA,
"files", len(result.FilesChanged),
)
// Push if requested
if push {
// Configure credential helper for push
if g.giteaToken != "" {
// Use git credential helper to inject token
// This avoids putting the token in the URL which would be visible in logs
credHelper := fmt.Sprintf("!f() { echo username=token; echo password=%s; }; f", g.giteaToken)
if err := g.runGitInPod(ctx, podName, workDir, "config", "credential.helper", credHelper); err != nil {
log.Warn("failed to configure credential helper", logging.FieldError, err)
// Continue anyway - push might still work if pod has other auth configured
}
}
if err := g.runGitInPod(ctx, podName, workDir, "push", "origin", "HEAD"); err != nil {
result.Error = fmt.Errorf("git push: %w", err)
return result
}
result.Pushed = true
log.Info("pushed changes", logging.FieldPodName, podName, "sha", result.CommitSHA)
}
return result
}
// runGitInPod executes a git command inside the pod via kubectl exec.
func (g *PodGitOperations) runGitInPod(ctx context.Context, podName, workDir string, args ...string) error {
// Build: kubectl exec -n <namespace> <pod> -- git -C <workDir> <args...>
kubectlArgs := []string{
"exec", "-n", g.namespace, podName, "--",
"git", "-C", workDir,
}
kubectlArgs = append(kubectlArgs, args...)
cmd := exec.CommandContext(ctx, "kubectl", kubectlArgs...)
var stderr bytes.Buffer
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
errMsg := g.redactToken(stderr.String())
return fmt.Errorf("%s: %s", err, errMsg)
}
return nil
}
// runGitInPodOutput executes a git command and returns stdout.
func (g *PodGitOperations) runGitInPodOutput(ctx context.Context, podName, workDir string, args ...string) (string, error) {
kubectlArgs := []string{
"exec", "-n", g.namespace, podName, "--",
"git", "-C", workDir,
}
kubectlArgs = append(kubectlArgs, args...)
cmd := exec.CommandContext(ctx, "kubectl", kubectlArgs...)
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
errMsg := g.redactToken(stderr.String())
return "", fmt.Errorf("%s: %s", err, errMsg)
}
return stdout.String(), nil
}
// redactToken removes the Gitea token from output.
func (g *PodGitOperations) redactToken(s string) string {
if g.giteaToken == "" {
return s
}
return strings.ReplaceAll(s, g.giteaToken, "[REDACTED]")
}