rdev/internal/adapter/woodpecker/client.go
jordan f20fc6c51c
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
feat(saga): implement enterprise-grade resilience architecture
Fixes issues from code review of resilience implementation:

- Wire saga system in main.go (SagaRepository, SagaExecutor, SagaHandler)
- Fix CompletedSteps() to include skipped steps for dependency resolution
- Fix reverse loop bug in saga compensation (use standard swap pattern)
- Add circuit breaker state change callbacks for Prometheus metrics

Phase 1 (Build Resilience):
- Add failure:retry to all component Kaniko build steps
- Add preflight registry health check before builds
- Add services-deployed sync point to decouple docs from critical path

Phase 2 (API Resilience):
- Add pipeline retry endpoint (POST /projects/{id}/pipelines/{number}/retry)
- Wire circuit breakers with metrics callbacks
- Add /health/circuits endpoint for circuit breaker status

Phase 3 (Saga Engine):
- Full domain model (Saga, SagaStep, RetryPolicy, BackoffType)
- PostgreSQL saga repository with CRUD and step management
- Saga executor with retry, compensation, skip step support
- Saga API handlers with CRUD and control operations

Phase 4 (Observability):
- Add saga metrics (total, step_duration, retry, circuit_breaker_state)
- Add logging fields (saga_id, saga_name, step_name)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-08 01:58:02 -07:00

415 lines
11 KiB
Go

// Package woodpecker provides a Woodpecker CI adapter implementing port.CIProvider.
//
// The Woodpecker API requires a few key concepts:
// - forge_remote_id: The ID of the repo in the forge (e.g., Gitea). Used to activate repos.
// - repo_id: Woodpecker's internal repo ID, used after activation.
//
// To activate a repo, we need to find it in the available repos list (synced from forge)
// and then POST to activate it using the forge_remote_id.
//
// Context Propagation Note:
// The Woodpecker Go SDK does not natively support context propagation for HTTP requests.
// Methods accept context.Context for interface compatibility and cancellation checks,
// but the underlying SDK calls do not use it for cancellation or timeouts.
package woodpecker
import (
"context"
"fmt"
"log/slog"
"net/http"
"strconv"
"strings"
"time"
"go.woodpecker-ci.org/woodpecker/v3/woodpecker-go/woodpecker"
"github.com/orchard9/rdev/internal/circuitbreaker"
"github.com/orchard9/rdev/internal/domain"
"github.com/orchard9/rdev/internal/port"
)
// Ensure Client implements CIProvider and ExternalHealthChecker.
var _ port.CIProvider = (*Client)(nil)
var _ port.ExternalHealthChecker = (*Client)(nil)
// tokenTransport is an http.RoundTripper that adds bearer token auth.
type tokenTransport struct {
token string
base http.RoundTripper
}
func (t *tokenTransport) RoundTrip(req *http.Request) (*http.Response, error) {
// Clone the request to avoid mutating the original per RoundTripper contract
req2 := req.Clone(req.Context())
req2.Header.Set("Authorization", "Bearer "+t.token)
return t.base.RoundTrip(req2)
}
// Client is a Woodpecker CI API client adapter.
type Client struct {
client woodpecker.Client
url string
logger *slog.Logger
cb *circuitbreaker.CircuitBreaker
}
// NewClient creates a new Woodpecker client.
// url is the Woodpecker server URL (e.g., https://ci.threesix.ai)
// token is an API token (generate from Woodpecker UI: Settings → API → Personal token)
// logger is optional; if nil, slog.Default() is used
func NewClient(url, token string, opts ...ClientOption) (*Client, error) {
if url == "" {
return nil, fmt.Errorf("woodpecker URL is required")
}
if token == "" {
return nil, fmt.Errorf("woodpecker token is required")
}
// Normalize URL
url = strings.TrimSuffix(url, "/")
// Create HTTP client with token auth
httpClient := &http.Client{
Timeout: 30 * time.Second,
Transport: &tokenTransport{
token: token,
base: http.DefaultTransport,
},
}
// Create Woodpecker client
client := woodpecker.NewClient(url, httpClient)
c := &Client{
client: client,
url: url,
logger: slog.Default(),
cb: circuitbreaker.GlobalRegistry.Get(circuitbreaker.NameWoodpecker),
}
// Apply options
for _, opt := range opts {
opt(c)
}
return c, nil
}
// ClientOption configures the Woodpecker client.
type ClientOption func(*Client)
// WithLogger sets a custom logger for the client.
func WithLogger(logger *slog.Logger) ClientOption {
return func(c *Client) {
if logger != nil {
c.logger = logger
}
}
}
// WithCircuitBreaker sets a custom circuit breaker for the client.
func WithCircuitBreaker(cb *circuitbreaker.CircuitBreaker) ClientOption {
return func(c *Client) {
if cb != nil {
c.cb = cb
}
}
}
// executeWithCircuitBreaker wraps a function call with circuit breaker protection.
func (c *Client) executeWithCircuitBreaker(fn func() error) error {
if c.cb == nil {
return fn()
}
return c.cb.Execute(fn)
}
// ActivateRepo enables CI for a repository.
// The forge parameter is unused (Woodpecker determines this from its config).
// owner/repo must match the repository in the forge.
func (c *Client) ActivateRepo(ctx context.Context, forge, owner, repo string) (*domain.CIRepo, error) {
// Check for context cancellation
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
fullName := owner + "/" + repo
// Retry loop for newly created repos - Woodpecker sync from Gitea is async.
// Limited to 5 attempts (15s max) to stay under Traefik's 30s proxy timeout.
// If repo doesn't appear in time, CI activation will be skipped (non-fatal).
var targetRepo *woodpecker.Repo
var lastErr error
maxAttempts := 5
retryDelay := 3 * time.Second
for attempt := 1; attempt <= maxAttempts; attempt++ {
// Check context before each attempt
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
// Sync and get ALL repos (including inactive) - new repos start inactive
repos, err := c.client.RepoList(woodpecker.RepoListOptions{All: true})
if err != nil {
lastErr = fmt.Errorf("failed to list repos: %w", err)
c.logger.Debug("failed to list repos", "error", err, "attempt", attempt)
time.Sleep(retryDelay)
continue
}
for _, r := range repos {
if strings.EqualFold(r.FullName, fullName) {
targetRepo = r
break
}
}
if targetRepo == nil {
// Repo not found in list - try direct lookup
targetRepo, err = c.client.RepoLookup(fullName)
if err != nil {
// SDK bug: RepoLookup returns non-nil empty struct on error
targetRepo = nil
lastErr = fmt.Errorf("repo not found in Woodpecker: %s", fullName)
if attempt < maxAttempts {
c.logger.Debug("repo not found, retrying", "repo", fullName, "attempt", attempt, "max", maxAttempts)
time.Sleep(retryDelay)
continue
}
}
}
// Check if repo was found AND has valid ForgeRemoteID (metadata sync complete)
if targetRepo != nil && targetRepo.ForgeRemoteID != "" {
break
}
// Repo found but ForgeRemoteID empty - metadata sync incomplete, retry
if targetRepo != nil && targetRepo.ForgeRemoteID == "" {
lastErr = fmt.Errorf("repo %s found but forge metadata not synced yet", fullName)
if attempt < maxAttempts {
c.logger.Debug("repo found but forge_remote_id empty, retrying", "repo", fullName, "attempt", attempt)
targetRepo = nil // Reset for next attempt
time.Sleep(retryDelay)
continue
}
}
}
if targetRepo == nil {
return nil, fmt.Errorf("%w (tried %d times)", lastErr, maxAttempts)
}
// Final check: ensure ForgeRemoteID is valid (non-empty)
if targetRepo.ForgeRemoteID == "" {
return nil, fmt.Errorf("repo %s found but forge metadata never synced (tried %d times)", fullName, maxAttempts)
}
// If already active, just return it
if targetRepo.IsActive {
return repoFromWoodpecker(targetRepo), nil
}
// Parse the forge remote ID (stored as string, API expects int64)
forgeID, err := strconv.ParseInt(targetRepo.ForgeRemoteID, 10, 64)
if err != nil {
return nil, fmt.Errorf("invalid forge_remote_id %q: %w", targetRepo.ForgeRemoteID, err)
}
// Activate the repo using the forge remote ID
activatedRepo, err := c.client.RepoPost(woodpecker.RepoPostOptions{ForgeRemoteID: forgeID})
if err != nil {
return nil, fmt.Errorf("failed to activate repo: %w", err)
}
return repoFromWoodpecker(activatedRepo), nil
}
// DeactivateRepo disables CI for a repository.
func (c *Client) DeactivateRepo(ctx context.Context, owner, repo string) error {
// Check for context cancellation
select {
case <-ctx.Done():
return ctx.Err()
default:
}
fullName := owner + "/" + repo
// Find the repo
r, err := c.client.RepoLookup(fullName)
if err != nil {
return fmt.Errorf("repo not found: %s", fullName)
}
// Deactivate (remove from Woodpecker)
if err := c.client.RepoDel(r.ID); err != nil {
return fmt.Errorf("failed to deactivate repo: %w", err)
}
return nil
}
// GetRepo returns the CI configuration for a repository.
func (c *Client) GetRepo(ctx context.Context, owner, repo string) (*domain.CIRepo, error) {
// Check for context cancellation
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
fullName := owner + "/" + repo
r, err := c.client.RepoLookup(fullName)
if err != nil {
return nil, fmt.Errorf("repo not found: %s", fullName)
}
return repoFromWoodpecker(r), nil
}
// ListRepos returns all repositories visible to the CI system.
func (c *Client) ListRepos(ctx context.Context) ([]*domain.CIRepo, error) {
// Check for context cancellation
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
repos, err := c.client.RepoList(woodpecker.RepoListOptions{})
if err != nil {
return nil, fmt.Errorf("failed to list repos: %w", err)
}
result := make([]*domain.CIRepo, len(repos))
for i, r := range repos {
result[i] = repoFromWoodpecker(r)
}
return result, nil
}
// AddSecret adds a secret to a repository for use in pipelines.
func (c *Client) AddSecret(ctx context.Context, owner, repo string, secret domain.CISecret) error {
// Check for context cancellation
select {
case <-ctx.Done():
return ctx.Err()
default:
}
fullName := owner + "/" + repo
// Find the repo to get its ID
r, err := c.client.RepoLookup(fullName)
if err != nil {
return fmt.Errorf("repo not found: %s", fullName)
}
// Create the secret
_, err = c.client.SecretCreate(r.ID, &woodpecker.Secret{
Name: secret.Name,
Value: secret.Value,
Events: secret.Events,
Images: secret.Images,
})
if err != nil {
return fmt.Errorf("failed to create secret: %w", err)
}
return nil
}
// DeleteSecret removes a secret from a repository.
func (c *Client) DeleteSecret(ctx context.Context, owner, repo, secretName string) error {
// Check for context cancellation
select {
case <-ctx.Done():
return ctx.Err()
default:
}
fullName := owner + "/" + repo
// Find the repo to get its ID
r, err := c.client.RepoLookup(fullName)
if err != nil {
return fmt.Errorf("repo not found: %s", fullName)
}
// Delete the secret
if err := c.client.SecretDelete(r.ID, secretName); err != nil {
return fmt.Errorf("failed to delete secret: %w", err)
}
return nil
}
// Check returns the health status of the Woodpecker CI system.
// Implements port.ExternalHealthChecker.
func (c *Client) Check(ctx context.Context) domain.ExternalSystemStatus {
start := time.Now()
status := domain.ExternalSystemStatus{
System: domain.ExternalSystemCI,
URL: c.url,
}
// Check context cancellation
select {
case <-ctx.Done():
status.Latency = time.Since(start)
status.LastChecked = time.Now().UTC()
status.Healthy = false
status.Error = ctx.Err().Error()
return status
default:
}
// Call Self() to get current user info (lightweight, tests auth)
_, err := c.client.Self()
status.Latency = time.Since(start)
status.LastChecked = time.Now().UTC()
if err != nil {
status.Healthy = false
status.Error = err.Error()
} else {
status.Healthy = true
status.LastHealthy = status.LastChecked
}
return status
}
// repoFromWoodpecker converts a woodpecker.Repo to domain.CIRepo.
func repoFromWoodpecker(r *woodpecker.Repo) *domain.CIRepo {
// Parse forge remote ID (string in SDK, int64 in our domain)
// Non-numeric ForgeRemoteID will result in 0 - this is intentional
// as some forges may use non-numeric IDs
var forgeID int64
if r.ForgeRemoteID != "" {
if parsed, err := strconv.ParseInt(r.ForgeRemoteID, 10, 64); err == nil {
forgeID = parsed
}
}
return &domain.CIRepo{
ID: r.ID,
ForgeRemoteID: forgeID,
Owner: r.Owner,
Name: r.Name,
FullName: r.FullName,
CloneURL: r.Clone,
Active: r.IsActive,
AllowPullRequests: r.AllowPull, // Renamed in SDK v3
Visibility: r.Visibility,
}
}