diff --git a/CLAUDE.md b/CLAUDE.md index fb4c948..03b9602 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -23,6 +23,7 @@ Run Claude Code instances in isolated Kubernetes pods with REST API control. Ena | **Worker pool management** | [services/worker-pool.md](.claude/guides/services/worker-pool.md) | | **Project templates** | [services/templates.md](.claude/guides/services/templates.md) | | **Composable monorepo templates** | [services/composable-monorepo.md](.claude/guides/services/composable-monorepo.md) | +| **Write E2E cookbook test scripts** | [cookbook-scripts/SKILL.md](.claude/skills/cookbook-scripts/SKILL.md) | | **Build orchestration** | [services/build-orchestration.md](.claude/guides/services/build-orchestration.md) | | **Build event streaming** | [services/build-streaming.md](.claude/guides/services/build-streaming.md) | | **Resource provisioning plan** | [services/resource-provisioning-plan.md](.claude/guides/services/resource-provisioning-plan.md) | @@ -164,7 +165,7 @@ cookbooks/ # End-to-end workflow guides | Build Orchestration | Planned | Structured build specs via API | | Composable Monorepo Templates | **Done** | Monorepo skeleton + component templates (service, worker, app-astro, app-react, cli) | -**Current Version:** v0.10.24 +**Current Version:** v0.10.25 ## Constraints diff --git a/cmd/rdev-api/config.go b/cmd/rdev-api/config.go index aab47a4..43b9871 100644 --- a/cmd/rdev-api/config.go +++ b/cmd/rdev-api/config.go @@ -104,7 +104,7 @@ func loadConfig() Config { DeployNamespace: envutil.GetEnv("DEPLOY_NAMESPACE", "projects"), DeployTLSIssuer: envutil.GetEnv("DEPLOY_TLS_ISSUER", "letsencrypt-prod"), ClusterIP: envutil.GetEnv("CLUSTER_IP", "208.122.204.172"), - RegistryURL: envutil.GetEnv("REGISTRY_URL", "zot.threesix.svc.cluster.local:5000"), + RegistryURL: envutil.GetEnv("REGISTRY_URL", "registry.threesix.ai"), WoodpeckerURL: envutil.GetEnv("WOODPECKER_URL", "https://ci.threesix.ai"), WoodpeckerAPIToken: os.Getenv("WOODPECKER_API_TOKEN"), WoodpeckerWebhookSecret: os.Getenv("WOODPECKER_WEBHOOK_SECRET"), diff --git a/cmd/rdev-api/main.go b/cmd/rdev-api/main.go index 6557796..9a9ce0c 100644 --- a/cmd/rdev-api/main.go +++ b/cmd/rdev-api/main.go @@ -250,6 +250,10 @@ func main() { Logger: logger, }).WithWebhookDispatcher(webhookDispatcher) + // Initialize operation tracking (for debugging project failures) + operationRepo := postgres.NewOperationRepository(database.DB) + operationService := service.NewOperationService(operationRepo, logger) + // Initialize worker pool infrastructure workerRegistryRepo := postgres.NewWorkerRegistryRepository(database.DB) buildAuditRepo := postgres.NewBuildAuditRepository(database.DB) @@ -386,6 +390,12 @@ func main() { buildsHandler := handlers.NewBuildsHandler(buildService) createAndBuildHandler := handlers.NewCreateAndBuildHandler(projectInfraService, buildService, logger) + // Initialize operations handler (for debugging project failures) + operationsHandler := handlers.NewOperationsHandler(operationRepo) + + // Suppress unused variable warning - operationService will be wired to handlers in instrumentation phase + _ = operationService + // Override default health/ready endpoints with full dependency checks healthHandler := handlers.NewHealthHandler("rdev-api", database.DB, nil). WithAgentRegistry(agentRegistry) @@ -412,6 +422,7 @@ func main() { workersHandler.Mount(app.Router()) buildsHandler.Mount(app.Router()) createAndBuildHandler.Mount(app.Router()) + operationsHandler.Mount(app.Router()) // Start queue processor worker (per-project command queue) queueProcessor := worker.NewQueueProcessor( @@ -471,12 +482,21 @@ func main() { ) queueMaintenance.Start() + // Start operation cleanup worker (30-day retention) + operationCleanup := worker.NewOperationCleanup(operationRepo, &worker.OperationCleanupConfig{ + RetentionPeriod: 30 * 24 * time.Hour, + CleanupInterval: 1 * time.Hour, + Logger: logger, + }) + operationCleanup.Start() + // Enable API documentation app.EnableDocs(buildOpenAPISpec()) app.OnShutdown(func(ctx context.Context) error { workExecutor.Stop() queueMaintenance.Stop() + operationCleanup.Stop() queueProcessor.Stop() webhookDispatcher.Stop() projectRepo.StopWatching() diff --git a/cookbooks/scripts/landing-test.sh b/cookbooks/scripts/landing-test.sh index d39d9ed..15c8f20 100755 --- a/cookbooks/scripts/landing-test.sh +++ b/cookbooks/scripts/landing-test.sh @@ -1,4 +1,6 @@ #!/bin/bash +set -euo pipefail + # Landing Page Cookbook Test Script # Tests the composable landing page flow from cookbooks/landing-page.md # @@ -10,315 +12,186 @@ # # Usage: # ./cookbooks/scripts/landing-test.sh run [name] # Run the full flow -# ./cookbooks/scripts/landing-test.sh teardown [name] # Clean up test resources # ./cookbooks/scripts/landing-test.sh status [name] # Check current status +# ./cookbooks/scripts/landing-test.sh diagnose [name] # Deep diagnostic analysis +# ./cookbooks/scripts/landing-test.sh teardown [name] # Clean up test resources -set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/common.sh" -# Configuration -API_URL="${RDEV_API_URL:-https://rdev.masq-ops.orchard9.ai}" -API_KEY="${RDEV_API_KEY:?RDEV_API_KEY environment variable required}" +COMMAND="${1:-}" +PROJECT_NAME="${2:-landing-test-$(date +%s)}" -# Colors -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } -log_success() { echo -e "${GREEN}[OK]${NC} $1"; } -log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } -log_error() { echo -e "${RED}[ERROR]${NC} $1"; } - -# Timeouts -PIPELINE_TIMEOUT=300 # 5 minutes max wait for CI pipeline -PIPELINE_POLL_INTERVAL=10 -SITE_TIMEOUT=120 # 2 minutes max wait for site to be live - -api_call() { - local method="$1" - local endpoint="$2" - local data="${3:-}" - - if [[ -n "$data" ]]; then - curl -s -X "$method" "${API_URL}${endpoint}" \ - -H "X-API-Key: ${API_KEY}" \ - -H "Content-Type: application/json" \ - -d "$data" - else - curl -s -X "$method" "${API_URL}${endpoint}" \ - -H "X-API-Key: ${API_KEY}" - fi -} - -check_health() { - log_info "Checking API health..." - local response - response=$(curl -s "${API_URL}/health") - if echo "$response" | jq -e '.data.status == "ok"' > /dev/null 2>&1; then - log_success "API is healthy" - return 0 - else - log_error "API health check failed" - echo "$response" | jq . - return 1 - fi -} - -# Wait for pipeline to appear and complete -wait_for_pipeline() { - local project_name="$1" - local start_time=$(date +%s) - local pipeline_found=false - local pipeline_number="" - local pipeline_status="" - - log_info "Waiting for CI pipeline to start (timeout: ${PIPELINE_TIMEOUT}s)..." - - while true; do - local elapsed=$(($(date +%s) - start_time)) - if [[ $elapsed -ge $PIPELINE_TIMEOUT ]]; then - log_error "Pipeline timeout after ${PIPELINE_TIMEOUT}s" - return 1 - fi - - local response - response=$(api_call GET "/projects/$project_name/pipelines" 2>/dev/null || echo "{}") - - # Check if we have pipelines - local pipeline_count - pipeline_count=$(echo "$response" | jq -r '.data | length' 2>/dev/null || echo "0") - - if [[ "$pipeline_count" -gt 0 ]]; then - if [[ "$pipeline_found" == "false" ]]; then - pipeline_found=true - pipeline_number=$(echo "$response" | jq -r '.data[0].number') - log_success "Pipeline #$pipeline_number started" - fi - - # Get latest pipeline status - pipeline_status=$(echo "$response" | jq -r '.data[0].status') - - case "$pipeline_status" in - success) - echo "" - log_success "Pipeline #$pipeline_number completed successfully (${elapsed}s)" - return 0 - ;; - failure|error|killed|declined) - echo "" - log_error "Pipeline #$pipeline_number failed with status: $pipeline_status" - echo "$response" | jq '.data[0]' - return 1 - ;; - running|pending) - echo -ne "\r${BLUE}[INFO]${NC} Pipeline #$pipeline_number status: $pipeline_status (${elapsed}s)... " - ;; - *) - echo -ne "\r${BLUE}[INFO]${NC} Pipeline #$pipeline_number status: $pipeline_status (${elapsed}s)... " - ;; - esac - else - echo -ne "\r${BLUE}[INFO]${NC} Waiting for pipeline to start (${elapsed}s)... " - fi - - sleep $PIPELINE_POLL_INTERVAL - done -} - -# Wait for site to be accessible -wait_for_site() { - local domain="$1" - local start_time=$(date +%s) - - log_info "Waiting for site to be live: https://$domain (timeout: ${SITE_TIMEOUT}s)..." - - while true; do - local elapsed=$(($(date +%s) - start_time)) - if [[ $elapsed -ge $SITE_TIMEOUT ]]; then - log_warn "Site timeout after ${SITE_TIMEOUT}s - may still be deploying" - return 1 - fi - - local http_code - http_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "https://$domain" 2>/dev/null || echo "000") - - if [[ "$http_code" == "200" ]]; then - echo "" - log_success "Site is live! HTTP $http_code (${elapsed}s)" - return 0 - elif [[ "$http_code" == "000" ]]; then - echo -ne "\r${BLUE}[INFO]${NC} Waiting for site... (${elapsed}s, connection failed) " - else - echo -ne "\r${BLUE}[INFO]${NC} Waiting for site... (${elapsed}s, HTTP $http_code) " - fi - - sleep 3 - done -} +if [[ -z "$COMMAND" ]]; then + echo "Landing Page E2E Test Script" + echo "" + echo "Usage: $0 [project-name]" + echo "" + echo "Commands:" + echo " run - Run the full composable landing page flow" + echo " status - Check project and component status" + echo " diagnose - Deep diagnostic of pipeline and site issues" + echo " teardown - Delete project (preserves git repo)" + echo "" + echo "Examples:" + echo " $0 run my-landing" + echo " $0 status my-landing" + echo " $0 diagnose my-landing" + echo " $0 teardown my-landing" + echo "" + exit 1 +fi # Main run flow run_flow() { - local project_name="$1" - - echo "" - echo "==========================================" - echo " Landing Page E2E Test (Composable)" - echo "==========================================" - echo "" - echo "Project: $project_name" - echo "" - - # Step 0: Health check - check_health || exit 1 - echo "" + print_header "Landing Page E2E Test (Composable)" + echo "Project: $PROJECT_NAME" # Step 1: Create project (monorepo skeleton) - log_info "Step 1: Creating project skeleton..." + print_header "Step 1: Creating project skeleton" local create_response - create_response=$(api_call POST "/projects" "{\"name\": \"$project_name\", \"description\": \"Landing page E2E test\"}") + create_response=$(api_call POST "/project" "{\"name\": \"$PROJECT_NAME\", \"description\": \"Landing page E2E test\"}") + echo "$create_response" | jq '.' local domain domain=$(echo "$create_response" | jq -r '.data.domain // ""') if [[ -z "$domain" || "$domain" == "null" ]]; then - log_error "Failed to create project" - echo "$create_response" | jq . + print_error "Failed to create project" exit 1 fi - log_success "Project created: $project_name" + print_success "Project created: $PROJECT_NAME" echo " Domain: $domain" - echo " Git: https://git.threesix.ai/jordan/$project_name" - echo "" + echo " Git: https://git.threesix.ai/$(get_git_owner)/$PROJECT_NAME" # Step 2: Add app-astro component - log_info "Step 2: Adding landing page component (app-astro)..." + print_header "Step 2: Adding landing page component (app-astro)" local component_response - component_response=$(api_call POST "/projects/$project_name/components" '{"type": "app", "name": "landing", "template": "app-astro"}') + component_response=$(api_call POST "/projects/$PROJECT_NAME/components" '{"type": "app-astro", "name": "landing", "template": "app-astro"}') local component_path component_path=$(echo "$component_response" | jq -r '.data.path // ""') if [[ -z "$component_path" || "$component_path" == "null" ]]; then - log_error "Failed to add component" - echo "$component_response" | jq . + print_error "Failed to add component" + echo "$component_response" | jq '.' exit 1 fi local component_port component_port=$(echo "$component_response" | jq -r '.data.port // "N/A"') - log_success "Component added: $component_path (port: $component_port)" - echo "" + print_success "Component added: $component_path (port: $component_port)" - # Step 3: Wait for pipeline - log_info "Step 3: Waiting for CI pipeline..." - echo "" - - if ! wait_for_pipeline "$project_name"; then - log_warn "Pipeline failed, but continuing to check if site is accessible..." + # Step 3: Wait for pipeline (auto-diagnoses on failure) + print_header "Step 3: Waiting for CI pipeline" + if ! wait_for_pipeline "$PROJECT_NAME"; then + print_warning "Pipeline failed, continuing to check site..." fi - echo "" - # Step 4: Wait for site - log_info "Step 4: Verifying site is accessible..." - - if wait_for_site "$domain"; then - log_success "Site verified!" - else - log_warn "Site not accessible yet, may need more time" + # Step 4: Wait for site (auto-diagnoses on timeout) + print_header "Step 4: Verifying site is accessible" + if ! wait_for_site "$domain" 30 5 "$PROJECT_NAME"; then + print_error "Site not accessible" + exit 1 fi - echo "" # Summary - echo "==========================================" - echo " Test Complete" - echo "==========================================" + print_header "Test Complete" + print_success "Project created: $PROJECT_NAME" + print_success "Landing page deployed" echo "" - echo " Site URL: https://$domain" - echo " Git: https://git.threesix.ai/jordan/$project_name" - echo " CI: https://ci.threesix.ai/jordan/$project_name" - echo "" - echo " To customize: POST /projects/$project_name/builds with a prompt" - echo " To teardown: $0 teardown $project_name" + echo "Site URL: https://$domain" + echo "Git repo: https://git.threesix.ai/$(get_git_owner)/$PROJECT_NAME" + echo "CI: https://ci.threesix.ai/$(get_git_owner)/$PROJECT_NAME" echo "" + echo "To customize: POST /projects/$PROJECT_NAME/builds with a prompt" + echo "To teardown: $0 teardown $PROJECT_NAME" } # Check status check_status() { - local project_name="$1" + print_header "Project Status: $PROJECT_NAME" - echo "" - echo "=== Project Status: $project_name ===" + echo "Project:" + api_call GET "/project/$PROJECT_NAME" | jq '.data // .' echo "" - log_info "Project info:" - api_call GET "/projects/$project_name" | jq '.data // .' + echo "Components:" + api_call GET "/projects/$PROJECT_NAME/components" | jq '.data // .' echo "" - log_info "Components:" - api_call GET "/projects/$project_name/components" | jq '.data // .' - echo "" + echo "Latest Pipelines:" + api_call GET "/projects/$PROJECT_NAME/pipelines" | jq '.data[:3] // .' +} - log_info "Latest pipelines:" - api_call GET "/projects/$project_name/pipelines" | jq '.data[:3] // .' +# Deep diagnostic +diagnose() { + print_header "Diagnostic: $PROJECT_NAME" + + # Get project info for domain + local project + project=$(api_call GET "/project/$PROJECT_NAME") + + local domain + domain=$(echo "$project" | jq -r '.data.domain // ""') + + if [[ -z "$domain" || "$domain" == "null" ]]; then + print_error "Project not found or no domain assigned" + echo "$project" | jq '.' + return 1 + fi + + echo "Project: $PROJECT_NAME" + echo "Domain: https://$domain" + echo "Git: https://git.threesix.ai/$(get_git_owner)/$PROJECT_NAME" + echo "CI: https://ci.threesix.ai/$(get_git_owner)/$PROJECT_NAME" + + # Run pipeline diagnostics + diagnose_pipeline_failure "$PROJECT_NAME" + + # Run site diagnostics + diagnose_site_failure "$domain" "$PROJECT_NAME" + + print_header "Summary" + echo "To retry the pipeline:" + print_cmd "Push a commit to trigger CI, or manually trigger from Woodpecker UI" echo "" + echo "To check real-time logs:" + print_cmd "kubectl logs -n projects -l app=$PROJECT_NAME -f" } # Teardown teardown() { - local project_name="$1" + print_header "Tearing down: $PROJECT_NAME" + + local result + result=$(api_call DELETE "/project/$PROJECT_NAME") + echo "$result" | jq '.' echo "" - log_info "Tearing down project: $project_name" - - local response - response=$(api_call DELETE "/projects/$project_name") - - if echo "$response" | jq -e '.error' > /dev/null 2>&1; then - log_error "Teardown failed" - echo "$response" | jq . - exit 1 - fi - - log_success "Project deleted (Gitea repo preserved)" - echo "$response" | jq '.data // .' - echo "" + print_success "Project deleted. Gitea repo preserved." } -# Parse command -COMMAND="${1:-}" -PROJECT_NAME="${2:-landing-test-$(date +%s)}" - case "$COMMAND" in run) - run_flow "$PROJECT_NAME" + run_flow ;; status) - check_status "$PROJECT_NAME" + check_status + ;; + diagnose) + diagnose ;; teardown) - teardown "$PROJECT_NAME" + teardown ;; *) - echo "Landing Page E2E Test Script" - echo "" - echo "Usage: $0 [project-name]" - echo "" - echo "Commands:" - echo " run Run the full composable landing page flow" - echo " status Check project and component status" - echo " teardown Delete project (preserves git repo)" - echo "" - echo "Examples:" - echo " $0 run my-landing" - echo " $0 status my-landing" - echo " $0 teardown my-landing" - echo "" + echo "Unknown command: $COMMAND" + echo "Valid commands: run, status, diagnose, teardown" exit 1 ;; esac diff --git a/deployments/k8s/base/templates/astro-landing/.claude/CLAUDE.md b/deployments/k8s/base/templates/astro-landing/.claude/CLAUDE.md new file mode 100644 index 0000000..8b0dbe9 --- /dev/null +++ b/deployments/k8s/base/templates/astro-landing/.claude/CLAUDE.md @@ -0,0 +1,49 @@ +# {{PROJECT_NAME}} + +Astro landing page deployed to {{DOMAIN}}. + +## Development + +```bash +npm install +npm run dev +``` + +Visit http://localhost:4321 to see the site. + +## Build + +```bash +npm run build +``` + +Output in `dist/` - static HTML/CSS/JS. + +## Deployment + +Pushes to `main` auto-deploy via Woodpecker CI: +1. Install dependencies +2. Build static site +3. Build Docker image (nginx serving dist/) +4. Push to registry +5. Update K8s deployment + +Live at: https://{{DOMAIN}} + +## Constraints + +- Use Astro components, minimize client JS +- Optimize images (use Astro Image) +- Keep Lighthouse score > 90 +- Tailwind for styling + +## File Structure + +``` +src/ + pages/ + index.astro # Main landing page + components/ # Reusable Astro components + layouts/ # Page layouts +public/ # Static assets +``` diff --git a/deployments/k8s/base/templates/default/.claude/CLAUDE.md b/deployments/k8s/base/templates/default/.claude/CLAUDE.md new file mode 100644 index 0000000..19d05b8 --- /dev/null +++ b/deployments/k8s/base/templates/default/.claude/CLAUDE.md @@ -0,0 +1,32 @@ +# {{PROJECT_NAME}} + +Project deployed to {{DOMAIN}} via threesix.ai platform. + +## Quick Start + +```bash +# Clone +git clone {{GIT_URL}} +cd {{PROJECT_NAME}} + +# Build +docker build -t {{PROJECT_NAME}} . + +# Run +docker run -p 8080:8080 {{PROJECT_NAME}} +``` + +## Deployment + +Pushes to `main` trigger automatic deployment via Woodpecker CI: +1. Build Docker image +2. Push to registry (registry.threesix.ai) +3. Update Kubernetes deployment + +Live at: https://{{DOMAIN}} + +## Constraints + +- Keep the Dockerfile optimized for build time +- Use multi-stage builds when possible +- All config via environment variables diff --git a/deployments/k8s/base/templates/go-api/.claude/CLAUDE.md b/deployments/k8s/base/templates/go-api/.claude/CLAUDE.md new file mode 100644 index 0000000..0963535 --- /dev/null +++ b/deployments/k8s/base/templates/go-api/.claude/CLAUDE.md @@ -0,0 +1,61 @@ +# {{PROJECT_NAME}} + +Go REST API deployed to {{DOMAIN}}. + +## Development + +```bash +go run ./cmd/api +``` + +API runs at http://localhost:8080 + +## Test + +```bash +go test ./... +``` + +## Build + +```bash +go build -o app ./cmd/api +``` + +## Deployment + +Pushes to `main` auto-deploy via Woodpecker CI: +1. Run tests +2. Build binary +3. Build Docker image +4. Push to registry +5. Update K8s deployment + +Live at: https://{{DOMAIN}} + +## API Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| GET | /health | Health check | +| GET | /api/v1/... | Your endpoints | + +## Constraints + +- Use chi router (github.com/go-chi/chi/v5) +- Return JSON responses with proper status codes +- Structured logging with slog +- Config via environment variables +- All DB queries with sqlx + +## File Structure + +``` +cmd/ + api/ + main.go # Entry point +internal/ + handlers/ # HTTP handlers + domain/ # Business models + service/ # Business logic +``` diff --git a/docs/features/operations-audit.md b/docs/features/operations-audit.md new file mode 100644 index 0000000..fa3ab33 --- /dev/null +++ b/docs/features/operations-audit.md @@ -0,0 +1,289 @@ +# Operations Audit System + +**Status**: Spec +**Purpose**: Make automated development debuggable via API + +## Overview + +Every action on a project is an **Operation**. Operations capture what happened, step-by-step, with enough detail to pinpoint failures without digging through logs. + +``` +GET /projects/testgo1/operations?status=failed + +→ Operation "build" failed at step "build-api": git executable not found +``` + +## Design Principles + +1. **Queryable via API** - No kubectl, no Woodpecker UI, no guessing +2. **Comprehensive, not verbose** - Capture essence + detail separately +3. **30-day retention** - Operations are for debugging, not compliance +4. **Linked to permanent audit** - `audit_log` stays forever, operations link to it + +## Data Model + +### Operations Table + +```sql +CREATE TABLE operations ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + project_id TEXT NOT NULL, + type TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'running', + + -- Correlation + request_id TEXT, -- HTTP request that initiated + triggered_by UUID, -- Parent operation (build triggered by component.add) + commit_sha TEXT, -- Git commit this operation created/triggered + external_ref TEXT, -- Woodpecker build#, K8s deployment, etc. + + -- Timing + started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + completed_at TIMESTAMPTZ, + duration_ms INT, + + -- Content (JSONB for flexibility) + input JSONB, -- What was requested + output JSONB, -- What was produced + + -- Error handling: essence + detail + error TEXT, -- One-line summary + error_detail TEXT, -- Full stack/output (truncated to 10KB) + + -- Steps + steps JSONB NOT NULL DEFAULT '[]' +); + +-- Indexes +CREATE INDEX idx_ops_project_time ON operations(project_id, started_at DESC); +CREATE INDEX idx_ops_project_status ON operations(project_id, status) WHERE status IN ('running', 'failed'); +CREATE INDEX idx_ops_commit ON operations(commit_sha) WHERE commit_sha IS NOT NULL; +CREATE INDEX idx_ops_cleanup ON operations(started_at) WHERE started_at < NOW() - INTERVAL '30 days'; +``` + +### Step Structure + +```json +{ + "name": "build-api", + "status": "failed", + "started_at": "2026-02-01T20:31:45Z", + "duration_ms": 17000, + "output": {"image": "registry.threesix.ai/testgo1/api:abc123"}, + "error": "git executable not found", + "error_detail": "exec: \"git\": executable file not found in $PATH\n at /app/pkg/app.go:24" +} +``` + +### Operation Types + +| Type | Trigger | Key Steps | +|------|---------|-----------| +| `project.create` | `POST /projects` | create_pod, create_repo, activate_ci, create_dns | +| `component.add` | `POST /projects/{id}/components` | render_template, commit_files, create_deployment | +| `build` | Woodpecker webhook | git, build-{component}, deploy-{component} | +| `resource.provision` | `POST /projects/{id}/databases` | create_database, create_user, store_credentials | + +## API + +### List Operations + +``` +GET /projects/{id}/operations +GET /projects/{id}/operations?status=failed +GET /projects/{id}/operations?type=build +GET /projects/{id}/operations?since=1h +GET /projects/{id}/operations?limit=50 +``` + +Response: +```json +{ + "data": [ + { + "id": "op-abc123", + "type": "build", + "status": "failed", + "started_at": "2026-02-01T20:31:45Z", + "duration_ms": 87000, + "error": "build-api: git executable not found", + "steps_summary": "git ✓ → build-web ✓ → build-api ✗" + } + ] +} +``` + +### Get Operation Detail + +``` +GET /projects/{id}/operations/{operation_id} +``` + +Response: +```json +{ + "data": { + "id": "op-abc123", + "type": "build", + "status": "failed", + "triggered_by": "op-xyz789", + "commit_sha": "abc123", + "external_ref": "build#42", + "started_at": "2026-02-01T20:31:45Z", + "completed_at": "2026-02-01T20:33:12Z", + "duration_ms": 87000, + "input": { + "commit_message": "Add service component: api" + }, + "steps": [ + {"name": "git", "status": "completed", "duration_ms": 5000}, + {"name": "build-web", "status": "completed", "duration_ms": 48000}, + { + "name": "build-api", + "status": "failed", + "duration_ms": 17000, + "error": "git executable not found", + "error_detail": "/app/pkg/app/app.go:24:2: github.com/jordan/testgo1/pkg@v0.0.0: exec: \"git\": executable file not found..." + } + ], + "error": "build-api: git executable not found", + "error_detail": "Full kaniko output..." + } +} +``` + +### Find by Commit + +``` +GET /projects/{id}/operations?commit=abc123 +``` + +Returns operations that created or were triggered by this commit. + +## Correlation + +### Request → Operation + +``` +HTTP Request (X-Request-ID: req-123) + ↓ +Handler creates Operation (id: op-abc, request_id: req-123) + ↓ +Service executes steps, updates operation + ↓ +Response includes operation_id +``` + +### Component Add → Build + +``` +component.add (op-abc) + → commits to git (sha: abc123) + → operation.commit_sha = "abc123" + +Woodpecker webhook fires for abc123 + → rdev looks up: SELECT id FROM operations WHERE commit_sha = 'abc123' + → creates build operation (triggered_by: op-abc) +``` + +### Linking to Permanent Audit + +Operations are temporary (30d). For compliance, `audit_log` is permanent. + +```sql +-- Add operation_id to audit_log +ALTER TABLE audit_log ADD COLUMN operation_id UUID; +CREATE INDEX idx_audit_operation ON audit_log(operation_id) WHERE operation_id IS NOT NULL; +``` + +Query permanent history via audit_log, debug recent issues via operations. + +## Implementation + +### Phase 1: Foundation +- [ ] Migration: operations table +- [ ] Domain: Operation, OperationStep +- [ ] Port: OperationRepository +- [ ] Adapter: PostgreSQL implementation +- [ ] Handler: GET /projects/{id}/operations + +### Phase 2: Instrumentation +- [ ] Instrument: project.create handler +- [ ] Instrument: component.add handler +- [ ] Instrument: resource provisioning +- [ ] Add operation_id to responses + +### Phase 3: Build Integration +- [ ] Woodpecker webhook receiver endpoint +- [ ] Parse build events into operation steps +- [ ] Link via commit_sha + +### Phase 4: Cleanup +- [ ] Background job: delete operations older than 30d +- [ ] Add operation_id column to audit_log + +## Files to Create/Modify + +``` +internal/ +├── domain/ +│ └── operation.go # NEW: Operation, OperationStep, OperationType +├── port/ +│ └── operation.go # NEW: OperationRepository interface +├── adapter/ +│ └── postgres/ +│ └── operation_repo.go # NEW: PostgreSQL implementation +├── service/ +│ └── operation_service.go # NEW: Business logic +├── handlers/ +│ └── operations.go # NEW: API handlers +│ └── project.go # MODIFY: Create operation on project.create +│ └── component.go # MODIFY: Create operation on component.add +│ └── webhooks.go # MODIFY: Handle Woodpecker build events +└── worker/ + └── cleanup.go # NEW: 30-day retention cleanup + +migrations/ +└── 015_operations.sql # NEW: Table + indexes +``` + +## Example Debugging Session + +```bash +# Project deployment failing. What happened? +$ curl -s "$API/projects/testgo1/operations?status=failed" | jq '.[0]' +{ + "id": "op-abc123", + "type": "build", + "error": "build-api: git executable not found", + "steps_summary": "git ✓ → build-web ✓ → build-api ✗" +} + +# Get details +$ curl -s "$API/projects/testgo1/operations/op-abc123" | jq '.steps[-1]' +{ + "name": "build-api", + "status": "failed", + "error": "git executable not found", + "error_detail": "exec: \"git\": executable file not found in $PATH..." +} + +# What triggered this build? +$ curl -s "$API/projects/testgo1/operations/op-abc123" | jq '.triggered_by' +"op-xyz789" + +# What was that operation? +$ curl -s "$API/projects/testgo1/operations/op-xyz789" | jq '{type, input}' +{ + "type": "component.add", + "input": {"template": "service", "name": "api"} +} + +# Root cause: component.add triggered build, build failed due to missing git in Dockerfile +``` + +## Open Questions + +1. **Stream running operations?** - Could add SSE endpoint for real-time step updates +2. **CLI integration?** - `rdev debug testgo1` to show recent failures +3. **Alerting?** - Webhook when operation fails? diff --git a/internal/adapter/postgres/operation_repo.go b/internal/adapter/postgres/operation_repo.go new file mode 100644 index 0000000..dd7c342 --- /dev/null +++ b/internal/adapter/postgres/operation_repo.go @@ -0,0 +1,569 @@ +// Package postgres provides PostgreSQL-based implementations of port interfaces. +package postgres + +import ( + "context" + "database/sql" + "encoding/json" + "errors" + "fmt" + "strings" + "time" + + "github.com/orchard9/rdev/internal/domain" + "github.com/orchard9/rdev/internal/port" +) + +// OperationRepository implements port.OperationRepository using PostgreSQL. +type OperationRepository struct { + db *sql.DB +} + +// NewOperationRepository creates a new PostgreSQL operation repository. +func NewOperationRepository(db *sql.DB) *OperationRepository { + return &OperationRepository{db: db} +} + +// Ensure OperationRepository implements port.OperationRepository at compile time. +var _ port.OperationRepository = (*OperationRepository)(nil) + +// Create creates a new operation record. +func (r *OperationRepository) Create(ctx context.Context, op *domain.Operation) error { + inputJSON, err := json.Marshal(op.Input) + if err != nil { + return fmt.Errorf("marshal input: %w", err) + } + + stepsJSON, err := json.Marshal(op.Steps) + if err != nil { + return fmt.Errorf("marshal steps: %w", err) + } + + _, err = r.db.ExecContext(ctx, ` + INSERT INTO operations ( + id, project_id, type, status, request_id, triggered_by, + commit_sha, external_ref, started_at, input, steps + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) + `, + op.ID, + op.ProjectID, + string(op.Type), + string(op.Status), + nullString(op.RequestID), + nullString(op.TriggeredBy), + nullString(op.CommitSHA), + nullString(op.ExternalRef), + op.StartedAt, + inputJSON, + stepsJSON, + ) + if err != nil { + return fmt.Errorf("insert operation: %w", err) + } + + return nil +} + +// Update updates an existing operation record. +func (r *OperationRepository) Update(ctx context.Context, op *domain.Operation) error { + inputJSON, err := json.Marshal(op.Input) + if err != nil { + return fmt.Errorf("marshal input: %w", err) + } + + outputJSON, err := json.Marshal(op.Output) + if err != nil { + return fmt.Errorf("marshal output: %w", err) + } + + stepsJSON, err := json.Marshal(op.Steps) + if err != nil { + return fmt.Errorf("marshal steps: %w", err) + } + + res, err := r.db.ExecContext(ctx, ` + UPDATE operations SET + status = $2, + request_id = $3, + triggered_by = $4, + commit_sha = $5, + external_ref = $6, + completed_at = $7, + duration_ms = $8, + input = $9, + output = $10, + error = $11, + error_detail = $12, + steps = $13 + WHERE id = $1 + `, + op.ID, + string(op.Status), + nullString(op.RequestID), + nullString(op.TriggeredBy), + nullString(op.CommitSHA), + nullString(op.ExternalRef), + nullTime(op.CompletedAt), + nullInt64(op.DurationMs), + inputJSON, + outputJSON, + nullString(op.Error), + nullString(op.ErrorDetail), + stepsJSON, + ) + if err != nil { + return fmt.Errorf("update operation: %w", err) + } + + rows, err := res.RowsAffected() + if err != nil { + return fmt.Errorf("rows affected: %w", err) + } + if rows == 0 { + return domain.ErrOperationNotFound + } + + return nil +} + +// Get retrieves an operation by ID. +func (r *OperationRepository) Get(ctx context.Context, id string) (*domain.Operation, error) { + row := r.db.QueryRowContext(ctx, ` + SELECT id, project_id, type, status, request_id, triggered_by, + commit_sha, external_ref, started_at, completed_at, duration_ms, + input, output, error, error_detail, steps, created_at + FROM operations + WHERE id = $1 + `, id) + + return r.scanOperation(row) +} + +// GetByCommitSHA finds the operation that created a specific commit. +func (r *OperationRepository) GetByCommitSHA(ctx context.Context, projectID, sha string) (*domain.Operation, error) { + row := r.db.QueryRowContext(ctx, ` + SELECT id, project_id, type, status, request_id, triggered_by, + commit_sha, external_ref, started_at, completed_at, duration_ms, + input, output, error, error_detail, steps, created_at + FROM operations + WHERE project_id = $1 AND commit_sha = $2 + ORDER BY started_at DESC + LIMIT 1 + `, projectID, sha) + + return r.scanOperation(row) +} + +// List returns operations matching the filter criteria. +func (r *OperationRepository) List(ctx context.Context, filter domain.OperationFilters) ([]*domain.Operation, error) { + filter.Normalize() + + query := strings.Builder{} + query.WriteString(` + SELECT id, project_id, type, status, request_id, triggered_by, + commit_sha, external_ref, started_at, completed_at, duration_ms, + input, output, error, error_detail, steps, created_at + FROM operations + WHERE project_id = $1 + `) + + args := []any{filter.ProjectID} + argNum := 2 + + if filter.Type != "" { + fmt.Fprintf(&query, " AND type = $%d", argNum) + args = append(args, string(filter.Type)) + argNum++ + } + + if filter.Status != "" { + fmt.Fprintf(&query, " AND status = $%d", argNum) + args = append(args, string(filter.Status)) + argNum++ + } + + if filter.CommitSHA != "" { + fmt.Fprintf(&query, " AND commit_sha = $%d", argNum) + args = append(args, filter.CommitSHA) + argNum++ + } + + if !filter.Since.IsZero() { + fmt.Fprintf(&query, " AND started_at >= $%d", argNum) + args = append(args, filter.Since) + argNum++ + } + + query.WriteString(" ORDER BY started_at DESC") + + fmt.Fprintf(&query, " LIMIT $%d", argNum) + args = append(args, filter.Limit) + + rows, err := r.db.QueryContext(ctx, query.String(), args...) + if err != nil { + return nil, fmt.Errorf("query operations: %w", err) + } + defer func() { _ = rows.Close() }() + + var operations []*domain.Operation + for rows.Next() { + op, err := r.scanOperationRows(rows) + if err != nil { + return nil, err + } + operations = append(operations, op) + } + + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate operations: %w", err) + } + + return operations, nil +} + +// AddStep appends a new step to an operation. +func (r *OperationRepository) AddStep(ctx context.Context, operationID string, step domain.OperationStep) error { + stepJSON, err := json.Marshal(step) + if err != nil { + return fmt.Errorf("marshal step: %w", err) + } + + // Use JSONB array concatenation to append the step + res, err := r.db.ExecContext(ctx, ` + UPDATE operations + SET steps = steps || $2::jsonb + WHERE id = $1 + `, operationID, stepJSON) + if err != nil { + return fmt.Errorf("add step: %w", err) + } + + rows, err := res.RowsAffected() + if err != nil { + return fmt.Errorf("rows affected: %w", err) + } + if rows == 0 { + return domain.ErrOperationNotFound + } + + return nil +} + +// UpdateStep updates an existing step within an operation. +func (r *OperationRepository) UpdateStep(ctx context.Context, operationID string, step domain.OperationStep) error { + // Get current steps + op, err := r.Get(ctx, operationID) + if err != nil { + return err + } + + // Find and update the step by name + found := false + for i := range op.Steps { + if op.Steps[i].Name == step.Name { + op.Steps[i] = step + found = true + break + } + } + + if !found { + return fmt.Errorf("step %q not found", step.Name) + } + + stepsJSON, err := json.Marshal(op.Steps) + if err != nil { + return fmt.Errorf("marshal steps: %w", err) + } + + res, err := r.db.ExecContext(ctx, ` + UPDATE operations + SET steps = $2 + WHERE id = $1 + `, operationID, stepsJSON) + if err != nil { + return fmt.Errorf("update step: %w", err) + } + + rows, err := res.RowsAffected() + if err != nil { + return fmt.Errorf("rows affected: %w", err) + } + if rows == 0 { + return domain.ErrOperationNotFound + } + + return nil +} + +// Complete marks an operation as completed or failed. +func (r *OperationRepository) Complete(ctx context.Context, operationID string, status domain.OperationStatus, output map[string]any, errMsg, errDetail string) error { + now := time.Now() + + // Get start time to calculate duration + var startedAt time.Time + err := r.db.QueryRowContext(ctx, `SELECT started_at FROM operations WHERE id = $1`, operationID).Scan(&startedAt) + if errors.Is(err, sql.ErrNoRows) { + return domain.ErrOperationNotFound + } + if err != nil { + return fmt.Errorf("get started_at: %w", err) + } + + durationMs := now.Sub(startedAt).Milliseconds() + + outputJSON, err := json.Marshal(output) + if err != nil { + return fmt.Errorf("marshal output: %w", err) + } + + // Truncate error detail if needed + errDetail = domain.TruncateErrorDetail(errDetail) + + res, err := r.db.ExecContext(ctx, ` + UPDATE operations + SET status = $2, completed_at = $3, duration_ms = $4, output = $5, error = $6, error_detail = $7 + WHERE id = $1 + `, operationID, string(status), now, durationMs, outputJSON, nullString(errMsg), nullString(errDetail)) + if err != nil { + return fmt.Errorf("complete operation: %w", err) + } + + rows, err := res.RowsAffected() + if err != nil { + return fmt.Errorf("rows affected: %w", err) + } + if rows == 0 { + return domain.ErrOperationNotFound + } + + return nil +} + +// SetCommitSHA updates the commit_sha field for an operation. +func (r *OperationRepository) SetCommitSHA(ctx context.Context, operationID, sha string) error { + res, err := r.db.ExecContext(ctx, ` + UPDATE operations SET commit_sha = $2 WHERE id = $1 + `, operationID, sha) + if err != nil { + return fmt.Errorf("set commit_sha: %w", err) + } + + rows, err := res.RowsAffected() + if err != nil { + return fmt.Errorf("rows affected: %w", err) + } + if rows == 0 { + return domain.ErrOperationNotFound + } + + return nil +} + +// SetTriggeredBy sets the triggered_by field to link to a parent operation. +func (r *OperationRepository) SetTriggeredBy(ctx context.Context, operationID, parentID string) error { + res, err := r.db.ExecContext(ctx, ` + UPDATE operations SET triggered_by = $2 WHERE id = $1 + `, operationID, parentID) + if err != nil { + return fmt.Errorf("set triggered_by: %w", err) + } + + rows, err := res.RowsAffected() + if err != nil { + return fmt.Errorf("rows affected: %w", err) + } + if rows == 0 { + return domain.ErrOperationNotFound + } + + return nil +} + +// DeleteOlderThan removes operations older than the specified time. +func (r *OperationRepository) DeleteOlderThan(ctx context.Context, cutoff time.Time) (int64, error) { + res, err := r.db.ExecContext(ctx, ` + DELETE FROM operations WHERE started_at < $1 + `, cutoff) + if err != nil { + return 0, fmt.Errorf("delete operations: %w", err) + } + + return res.RowsAffected() +} + +// scanOperation scans a single operation from a QueryRow result. +func (r *OperationRepository) scanOperation(row *sql.Row) (*domain.Operation, error) { + var op domain.Operation + var opType, status string + var requestID, triggeredBy, commitSHA, externalRef sql.NullString + var completedAt sql.NullTime + var durationMs sql.NullInt64 + var inputJSON, outputJSON, stepsJSON []byte + var errMsg, errDetail sql.NullString + + err := row.Scan( + &op.ID, + &op.ProjectID, + &opType, + &status, + &requestID, + &triggeredBy, + &commitSHA, + &externalRef, + &op.StartedAt, + &completedAt, + &durationMs, + &inputJSON, + &outputJSON, + &errMsg, + &errDetail, + &stepsJSON, + &op.CreatedAt, + ) + if errors.Is(err, sql.ErrNoRows) { + return nil, domain.ErrOperationNotFound + } + if err != nil { + return nil, fmt.Errorf("scan operation: %w", err) + } + + op.Type = domain.OperationType(opType) + op.Status = domain.OperationStatus(status) + + if requestID.Valid { + op.RequestID = requestID.String + } + if triggeredBy.Valid { + op.TriggeredBy = triggeredBy.String + } + if commitSHA.Valid { + op.CommitSHA = commitSHA.String + } + if externalRef.Valid { + op.ExternalRef = externalRef.String + } + if completedAt.Valid { + op.CompletedAt = &completedAt.Time + } + if durationMs.Valid { + op.DurationMs = durationMs.Int64 + } + if errMsg.Valid { + op.Error = errMsg.String + } + if errDetail.Valid { + op.ErrorDetail = errDetail.String + } + + if len(inputJSON) > 0 { + if err := json.Unmarshal(inputJSON, &op.Input); err != nil { + return nil, fmt.Errorf("unmarshal input: %w", err) + } + } + if len(outputJSON) > 0 { + if err := json.Unmarshal(outputJSON, &op.Output); err != nil { + return nil, fmt.Errorf("unmarshal output: %w", err) + } + } + if len(stepsJSON) > 0 { + if err := json.Unmarshal(stepsJSON, &op.Steps); err != nil { + return nil, fmt.Errorf("unmarshal steps: %w", err) + } + } + + return &op, nil +} + +// scanOperationRows scans a single operation from a Rows result. +func (r *OperationRepository) scanOperationRows(rows *sql.Rows) (*domain.Operation, error) { + var op domain.Operation + var opType, status string + var requestID, triggeredBy, commitSHA, externalRef sql.NullString + var completedAt sql.NullTime + var durationMs sql.NullInt64 + var inputJSON, outputJSON, stepsJSON []byte + var errMsg, errDetail sql.NullString + + err := rows.Scan( + &op.ID, + &op.ProjectID, + &opType, + &status, + &requestID, + &triggeredBy, + &commitSHA, + &externalRef, + &op.StartedAt, + &completedAt, + &durationMs, + &inputJSON, + &outputJSON, + &errMsg, + &errDetail, + &stepsJSON, + &op.CreatedAt, + ) + if err != nil { + return nil, fmt.Errorf("scan operation: %w", err) + } + + op.Type = domain.OperationType(opType) + op.Status = domain.OperationStatus(status) + + if requestID.Valid { + op.RequestID = requestID.String + } + if triggeredBy.Valid { + op.TriggeredBy = triggeredBy.String + } + if commitSHA.Valid { + op.CommitSHA = commitSHA.String + } + if externalRef.Valid { + op.ExternalRef = externalRef.String + } + if completedAt.Valid { + op.CompletedAt = &completedAt.Time + } + if durationMs.Valid { + op.DurationMs = durationMs.Int64 + } + if errMsg.Valid { + op.Error = errMsg.String + } + if errDetail.Valid { + op.ErrorDetail = errDetail.String + } + + if len(inputJSON) > 0 { + if err := json.Unmarshal(inputJSON, &op.Input); err != nil { + return nil, fmt.Errorf("unmarshal input: %w", err) + } + } + if len(outputJSON) > 0 { + if err := json.Unmarshal(outputJSON, &op.Output); err != nil { + return nil, fmt.Errorf("unmarshal output: %w", err) + } + } + if len(stepsJSON) > 0 { + if err := json.Unmarshal(stepsJSON, &op.Steps); err != nil { + return nil, fmt.Errorf("unmarshal steps: %w", err) + } + } + + return &op, nil +} + +// nullInt64 converts an int64 to sql.NullInt64 (null if 0). +func nullInt64(v int64) sql.NullInt64 { + return sql.NullInt64{Int64: v, Valid: v != 0} +} + +// nullTime converts a *time.Time to sql.NullTime. +func nullTime(t *time.Time) sql.NullTime { + if t == nil { + return sql.NullTime{} + } + return sql.NullTime{Time: *t, Valid: true} +} diff --git a/internal/adapter/templates/templates/astro-landing/.claude/CLAUDE.md b/internal/adapter/templates/templates/astro-landing/.claude/CLAUDE.md new file mode 100644 index 0000000..8b0dbe9 --- /dev/null +++ b/internal/adapter/templates/templates/astro-landing/.claude/CLAUDE.md @@ -0,0 +1,49 @@ +# {{PROJECT_NAME}} + +Astro landing page deployed to {{DOMAIN}}. + +## Development + +```bash +npm install +npm run dev +``` + +Visit http://localhost:4321 to see the site. + +## Build + +```bash +npm run build +``` + +Output in `dist/` - static HTML/CSS/JS. + +## Deployment + +Pushes to `main` auto-deploy via Woodpecker CI: +1. Install dependencies +2. Build static site +3. Build Docker image (nginx serving dist/) +4. Push to registry +5. Update K8s deployment + +Live at: https://{{DOMAIN}} + +## Constraints + +- Use Astro components, minimize client JS +- Optimize images (use Astro Image) +- Keep Lighthouse score > 90 +- Tailwind for styling + +## File Structure + +``` +src/ + pages/ + index.astro # Main landing page + components/ # Reusable Astro components + layouts/ # Page layouts +public/ # Static assets +``` diff --git a/internal/adapter/templates/templates/components/app-astro/Dockerfile.tmpl b/internal/adapter/templates/templates/components/app-astro/Dockerfile.tmpl index f2656b0..847f228 100644 --- a/internal/adapter/templates/templates/components/app-astro/Dockerfile.tmpl +++ b/internal/adapter/templates/templates/components/app-astro/Dockerfile.tmpl @@ -29,6 +29,6 @@ FROM nginx:alpine COPY --from=build /workspace/apps/{{COMPONENT_NAME}}/dist /usr/share/nginx/html COPY apps/{{COMPONENT_NAME}}/nginx.conf /etc/nginx/conf.d/default.conf -EXPOSE 80 +EXPOSE {{PORT}} CMD ["nginx", "-g", "daemon off;"] diff --git a/internal/adapter/templates/templates/components/app-astro/nginx.conf.tmpl b/internal/adapter/templates/templates/components/app-astro/nginx.conf.tmpl index 2ab46b7..c6d5817 100644 --- a/internal/adapter/templates/templates/components/app-astro/nginx.conf.tmpl +++ b/internal/adapter/templates/templates/components/app-astro/nginx.conf.tmpl @@ -1,5 +1,5 @@ server { - listen 80; + listen {{PORT}}; server_name localhost; root /usr/share/nginx/html; index index.html; diff --git a/internal/adapter/templates/templates/components/app-react/Dockerfile.tmpl b/internal/adapter/templates/templates/components/app-react/Dockerfile.tmpl index f2656b0..847f228 100644 --- a/internal/adapter/templates/templates/components/app-react/Dockerfile.tmpl +++ b/internal/adapter/templates/templates/components/app-react/Dockerfile.tmpl @@ -29,6 +29,6 @@ FROM nginx:alpine COPY --from=build /workspace/apps/{{COMPONENT_NAME}}/dist /usr/share/nginx/html COPY apps/{{COMPONENT_NAME}}/nginx.conf /etc/nginx/conf.d/default.conf -EXPOSE 80 +EXPOSE {{PORT}} CMD ["nginx", "-g", "daemon off;"] diff --git a/internal/adapter/templates/templates/components/app-react/nginx.conf.tmpl b/internal/adapter/templates/templates/components/app-react/nginx.conf.tmpl index 2ab46b7..c6d5817 100644 --- a/internal/adapter/templates/templates/components/app-react/nginx.conf.tmpl +++ b/internal/adapter/templates/templates/components/app-react/nginx.conf.tmpl @@ -1,5 +1,5 @@ server { - listen 80; + listen {{PORT}}; server_name localhost; root /usr/share/nginx/html; index index.html; diff --git a/internal/adapter/templates/templates/components/service/Dockerfile.tmpl b/internal/adapter/templates/templates/components/service/Dockerfile.tmpl index 657b5d8..57377af 100644 --- a/internal/adapter/templates/templates/components/service/Dockerfile.tmpl +++ b/internal/adapter/templates/templates/components/service/Dockerfile.tmpl @@ -1,22 +1,21 @@ # Build stage FROM golang:1.23-alpine AS builder +RUN apk add --no-cache git + +# Configure Go workspace and private modules +ENV GOPRIVATE=git.threesix.ai/* +ENV GOWORK=/app/go.work + WORKDIR /app -# Copy go workspace files +# Copy go workspace and all source (workspace deps are local) COPY go.work go.work.sum* ./ -COPY pkg/go.mod pkg/go.sum* ./pkg/ -COPY services/{{COMPONENT_NAME}}/go.mod services/{{COMPONENT_NAME}}/go.sum* ./services/{{COMPONENT_NAME}}/ - -# Download dependencies -RUN cd services/{{COMPONENT_NAME}} && go mod download - -# Copy source COPY pkg/ ./pkg/ COPY services/{{COMPONENT_NAME}}/ ./services/{{COMPONENT_NAME}}/ -# Build -RUN cd services/{{COMPONENT_NAME}} && CGO_ENABLED=0 go build -o /{{COMPONENT_NAME}} ./cmd/server +# Build from workspace root +RUN CGO_ENABLED=0 go build -o /{{COMPONENT_NAME}} ./services/{{COMPONENT_NAME}}/cmd/server # Production stage FROM alpine:3.19 diff --git a/internal/adapter/templates/templates/components/service/go.mod.tmpl b/internal/adapter/templates/templates/components/service/go.mod.tmpl index 0976184..dc1afdb 100644 --- a/internal/adapter/templates/templates/components/service/go.mod.tmpl +++ b/internal/adapter/templates/templates/components/service/go.mod.tmpl @@ -3,3 +3,6 @@ module {{GO_MODULE}}/services/{{COMPONENT_NAME}} go 1.23 require {{GO_MODULE}}/pkg v0.0.0 + +// Use local workspace modules (for Docker builds without go.work) +replace {{GO_MODULE}}/pkg => ../../pkg diff --git a/internal/adapter/templates/templates/components/worker/Dockerfile.tmpl b/internal/adapter/templates/templates/components/worker/Dockerfile.tmpl index 377e137..33fea68 100644 --- a/internal/adapter/templates/templates/components/worker/Dockerfile.tmpl +++ b/internal/adapter/templates/templates/components/worker/Dockerfile.tmpl @@ -1,22 +1,21 @@ # Build stage FROM golang:1.23-alpine AS builder +RUN apk add --no-cache git + +# Configure Go workspace and private modules +ENV GOPRIVATE=git.threesix.ai/* +ENV GOWORK=/app/go.work + WORKDIR /app -# Copy go workspace files +# Copy go workspace and all source (workspace deps are local) COPY go.work go.work.sum* ./ -COPY pkg/go.mod pkg/go.sum* ./pkg/ -COPY workers/{{COMPONENT_NAME}}/go.mod workers/{{COMPONENT_NAME}}/go.sum* ./workers/{{COMPONENT_NAME}}/ - -# Download dependencies -RUN cd workers/{{COMPONENT_NAME}} && go mod download - -# Copy source COPY pkg/ ./pkg/ COPY workers/{{COMPONENT_NAME}}/ ./workers/{{COMPONENT_NAME}}/ -# Build -RUN cd workers/{{COMPONENT_NAME}} && CGO_ENABLED=0 go build -o /{{COMPONENT_NAME}} ./cmd/worker +# Build from workspace root +RUN CGO_ENABLED=0 go build -o /{{COMPONENT_NAME}} ./workers/{{COMPONENT_NAME}}/cmd/worker # Production stage FROM alpine:3.19 diff --git a/internal/adapter/templates/templates/components/worker/go.mod.tmpl b/internal/adapter/templates/templates/components/worker/go.mod.tmpl index 3a67ee9..7ff0d21 100644 --- a/internal/adapter/templates/templates/components/worker/go.mod.tmpl +++ b/internal/adapter/templates/templates/components/worker/go.mod.tmpl @@ -3,3 +3,6 @@ module {{GO_MODULE}}/workers/{{COMPONENT_NAME}} go 1.23 require {{GO_MODULE}}/pkg v0.0.0 + +// Use local workspace modules (for Docker builds without go.work) +replace {{GO_MODULE}}/pkg => ../../pkg diff --git a/internal/adapter/templates/templates/default/.claude/CLAUDE.md b/internal/adapter/templates/templates/default/.claude/CLAUDE.md new file mode 100644 index 0000000..19d05b8 --- /dev/null +++ b/internal/adapter/templates/templates/default/.claude/CLAUDE.md @@ -0,0 +1,32 @@ +# {{PROJECT_NAME}} + +Project deployed to {{DOMAIN}} via threesix.ai platform. + +## Quick Start + +```bash +# Clone +git clone {{GIT_URL}} +cd {{PROJECT_NAME}} + +# Build +docker build -t {{PROJECT_NAME}} . + +# Run +docker run -p 8080:8080 {{PROJECT_NAME}} +``` + +## Deployment + +Pushes to `main` trigger automatic deployment via Woodpecker CI: +1. Build Docker image +2. Push to registry (registry.threesix.ai) +3. Update Kubernetes deployment + +Live at: https://{{DOMAIN}} + +## Constraints + +- Keep the Dockerfile optimized for build time +- Use multi-stage builds when possible +- All config via environment variables diff --git a/internal/adapter/templates/templates/go-api/.claude/CLAUDE.md b/internal/adapter/templates/templates/go-api/.claude/CLAUDE.md new file mode 100644 index 0000000..0963535 --- /dev/null +++ b/internal/adapter/templates/templates/go-api/.claude/CLAUDE.md @@ -0,0 +1,61 @@ +# {{PROJECT_NAME}} + +Go REST API deployed to {{DOMAIN}}. + +## Development + +```bash +go run ./cmd/api +``` + +API runs at http://localhost:8080 + +## Test + +```bash +go test ./... +``` + +## Build + +```bash +go build -o app ./cmd/api +``` + +## Deployment + +Pushes to `main` auto-deploy via Woodpecker CI: +1. Run tests +2. Build binary +3. Build Docker image +4. Push to registry +5. Update K8s deployment + +Live at: https://{{DOMAIN}} + +## API Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| GET | /health | Health check | +| GET | /api/v1/... | Your endpoints | + +## Constraints + +- Use chi router (github.com/go-chi/chi/v5) +- Return JSON responses with proper status codes +- Structured logging with slog +- Config via environment variables +- All DB queries with sqlx + +## File Structure + +``` +cmd/ + api/ + main.go # Entry point +internal/ + handlers/ # HTTP handlers + domain/ # Business models + service/ # Business logic +``` diff --git a/internal/adapter/templates/templates/skeleton/packages/logger/src/logger.ts b/internal/adapter/templates/templates/skeleton/packages/logger/src/logger.ts index acee745..7a516f4 100644 --- a/internal/adapter/templates/templates/skeleton/packages/logger/src/logger.ts +++ b/internal/adapter/templates/templates/skeleton/packages/logger/src/logger.ts @@ -59,7 +59,7 @@ export class Logger { private batchSize: number; private flushInterval: number; - constructor(private config: LoggerConfig) { + constructor(config: LoggerConfig) { this.minLevel = LEVEL_PRIORITY[config.level]; this.batchSize = config.batchSize ?? 20; this.flushInterval = config.flushInterval ?? 5000; diff --git a/internal/db/migrations/015_operations.sql b/internal/db/migrations/015_operations.sql new file mode 100644 index 0000000..11589e5 --- /dev/null +++ b/internal/db/migrations/015_operations.sql @@ -0,0 +1,68 @@ +-- Operations: Tracks all project operations for debugging +-- Operations capture what happened step-by-step with enough detail to pinpoint failures. +-- 30-day retention - operations are for debugging, not compliance. + +CREATE TABLE IF NOT EXISTS operations ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + project_id TEXT NOT NULL, + type TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', + + -- Correlation + request_id TEXT, -- HTTP request that initiated + triggered_by UUID REFERENCES operations(id) ON DELETE SET NULL, + commit_sha TEXT, -- Git commit this operation created/triggered + external_ref TEXT, -- Woodpecker build#, K8s deployment, etc. + + -- Timing + started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + completed_at TIMESTAMPTZ, + duration_ms INT, + + -- Content (JSONB for flexibility) + input JSONB, -- What was requested + output JSONB, -- What was produced + + -- Error handling: essence + detail + error TEXT, -- One-line summary + error_detail TEXT, -- Full stack/output (truncated to 10KB) + + -- Steps + steps JSONB NOT NULL DEFAULT '[]', + + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +-- Index for listing operations by project (most common query) +CREATE INDEX IF NOT EXISTS idx_ops_project_time ON operations(project_id, started_at DESC); + +-- Partial index for active/failed operations (status queries) +CREATE INDEX IF NOT EXISTS idx_ops_project_status ON operations(project_id, status) + WHERE status IN ('pending', 'running', 'failed'); + +-- Index for finding operations by commit SHA (build correlation) +CREATE INDEX IF NOT EXISTS idx_ops_commit ON operations(commit_sha) + WHERE commit_sha IS NOT NULL; + +-- Index for cleanup worker (delete operations older than 30 days) +CREATE INDEX IF NOT EXISTS idx_ops_cleanup ON operations(started_at); + +-- Comments +COMMENT ON TABLE operations IS 'Tracks project operations for debugging (30-day retention)'; +COMMENT ON COLUMN operations.id IS 'Unique operation ID'; +COMMENT ON COLUMN operations.project_id IS 'Project this operation belongs to'; +COMMENT ON COLUMN operations.type IS 'Operation type: project.create, component.add, build, resource.provision'; +COMMENT ON COLUMN operations.status IS 'Operation status: pending, running, completed, failed'; +COMMENT ON COLUMN operations.request_id IS 'HTTP request ID that initiated this operation'; +COMMENT ON COLUMN operations.triggered_by IS 'Parent operation that triggered this one'; +COMMENT ON COLUMN operations.commit_sha IS 'Git commit this operation created or was triggered by'; +COMMENT ON COLUMN operations.external_ref IS 'External reference (e.g., Woodpecker build#42)'; +COMMENT ON COLUMN operations.started_at IS 'When the operation started'; +COMMENT ON COLUMN operations.completed_at IS 'When the operation finished'; +COMMENT ON COLUMN operations.duration_ms IS 'Total operation duration in milliseconds'; +COMMENT ON COLUMN operations.input IS 'Operation input parameters as JSONB'; +COMMENT ON COLUMN operations.output IS 'Operation output/result as JSONB'; +COMMENT ON COLUMN operations.error IS 'One-line error summary'; +COMMENT ON COLUMN operations.error_detail IS 'Full error detail (truncated to 10KB)'; +COMMENT ON COLUMN operations.steps IS 'Operation steps as JSONB array'; +COMMENT ON COLUMN operations.created_at IS 'When the record was created'; diff --git a/internal/domain/errors.go b/internal/domain/errors.go index 5069077..400ac8e 100644 --- a/internal/domain/errors.go +++ b/internal/domain/errors.go @@ -75,6 +75,9 @@ var ( // Audit errors ErrAuditNotFound = errors.New("audit log entry not found") + // Operation errors + ErrOperationNotFound = errors.New("operation not found") + // Infrastructure errors (should typically be wrapped) ErrDatabaseConnection = errors.New("database connection error") ErrKubernetesError = errors.New("kubernetes error") diff --git a/internal/domain/operation.go b/internal/domain/operation.go new file mode 100644 index 0000000..4b79be3 --- /dev/null +++ b/internal/domain/operation.go @@ -0,0 +1,213 @@ +package domain + +import ( + "strings" + "time" +) + +// OperationType represents the type of operation. +type OperationType string + +const ( + OperationTypeProjectCreate OperationType = "project.create" + OperationTypeComponentAdd OperationType = "component.add" + OperationTypeBuild OperationType = "build" + OperationTypeResourceProvision OperationType = "resource.provision" +) + +// IsValid returns true if the operation type is known. +func (t OperationType) IsValid() bool { + switch t { + case OperationTypeProjectCreate, OperationTypeComponentAdd, + OperationTypeBuild, OperationTypeResourceProvision: + return true + } + return false +} + +// OperationStatus represents the status of an operation. +type OperationStatus string + +const ( + OperationStatusPending OperationStatus = "pending" + OperationStatusRunning OperationStatus = "running" + OperationStatusCompleted OperationStatus = "completed" + OperationStatusFailed OperationStatus = "failed" +) + +// IsValid returns true if the status is known. +func (s OperationStatus) IsValid() bool { + switch s { + case OperationStatusPending, OperationStatusRunning, + OperationStatusCompleted, OperationStatusFailed: + return true + } + return false +} + +// IsTerminal returns true if the status is a final state. +func (s OperationStatus) IsTerminal() bool { + return s == OperationStatusCompleted || s == OperationStatusFailed +} + +// OperationStep represents a single step within an operation. +type OperationStep struct { + // Name is the step identifier (e.g., "git", "build-api", "deploy-web"). + Name string `json:"name"` + + // Status is the step status. + Status OperationStatus `json:"status"` + + // StartedAt is when the step started. + StartedAt time.Time `json:"started_at"` + + // DurationMs is the step duration in milliseconds. + DurationMs int64 `json:"duration_ms,omitempty"` + + // Output contains step-specific output data. + Output map[string]any `json:"output,omitempty"` + + // Error is a one-line error summary. + Error string `json:"error,omitempty"` + + // ErrorDetail is the full error detail. + ErrorDetail string `json:"error_detail,omitempty"` +} + +// Operation represents a tracked project operation. +type Operation struct { + // ID is the unique operation identifier. + ID string `json:"id"` + + // ProjectID is the project this operation belongs to. + ProjectID string `json:"project_id"` + + // Type is the operation type. + Type OperationType `json:"type"` + + // Status is the current operation status. + Status OperationStatus `json:"status"` + + // RequestID is the HTTP request that initiated this operation. + RequestID string `json:"request_id,omitempty"` + + // TriggeredBy is the ID of the parent operation that triggered this one. + TriggeredBy string `json:"triggered_by,omitempty"` + + // CommitSHA is the git commit this operation created or was triggered by. + CommitSHA string `json:"commit_sha,omitempty"` + + // ExternalRef is an external reference (e.g., "build#42"). + ExternalRef string `json:"external_ref,omitempty"` + + // StartedAt is when the operation started. + StartedAt time.Time `json:"started_at"` + + // CompletedAt is when the operation finished. + CompletedAt *time.Time `json:"completed_at,omitempty"` + + // DurationMs is the total operation duration in milliseconds. + DurationMs int64 `json:"duration_ms,omitempty"` + + // Input contains the operation input parameters. + Input map[string]any `json:"input,omitempty"` + + // Output contains the operation output/result. + Output map[string]any `json:"output,omitempty"` + + // Error is a one-line error summary. + Error string `json:"error,omitempty"` + + // ErrorDetail is the full error detail (truncated to 10KB). + ErrorDetail string `json:"error_detail,omitempty"` + + // Steps contains the operation steps. + Steps []OperationStep `json:"steps,omitempty"` + + // CreatedAt is when the record was created. + CreatedAt time.Time `json:"created_at"` +} + +// StepsSummary returns a human-readable summary of step statuses. +// Example: "git ✓ → build-web ✓ → build-api ✗" +func (o *Operation) StepsSummary() string { + if len(o.Steps) == 0 { + return "" + } + + var parts []string + for _, step := range o.Steps { + symbol := "?" + switch step.Status { + case OperationStatusCompleted: + symbol = "✓" + case OperationStatusFailed: + symbol = "✗" + case OperationStatusRunning: + symbol = "…" + case OperationStatusPending: + symbol = "○" + } + parts = append(parts, step.Name+" "+symbol) + } + return strings.Join(parts, " → ") +} + +// FailedStep returns the first failed step, or nil if none failed. +func (o *Operation) FailedStep() *OperationStep { + for i := range o.Steps { + if o.Steps[i].Status == OperationStatusFailed { + return &o.Steps[i] + } + } + return nil +} + +// OperationFilters specifies criteria for listing operations. +type OperationFilters struct { + // ProjectID filters by project (required for List). + ProjectID string + + // Type filters by operation type. + Type OperationType + + // Status filters by operation status. + Status OperationStatus + + // CommitSHA filters by commit SHA. + CommitSHA string + + // Since filters operations started after this time. + Since time.Time + + // Limit is the maximum number of operations to return. + Limit int +} + +// DefaultOperationFilters returns filters with default values. +func DefaultOperationFilters() OperationFilters { + return OperationFilters{ + Limit: 50, + } +} + +// Normalize applies defaults and limits to the filters. +func (f *OperationFilters) Normalize() { + if f.Limit <= 0 { + f.Limit = 50 + } + if f.Limit > 200 { + f.Limit = 200 + } +} + +// MaxErrorDetailSize is the maximum size of error_detail (10KB). +const MaxErrorDetailSize = 10 * 1024 + +// TruncateErrorDetail truncates error detail to the maximum allowed size. +func TruncateErrorDetail(detail string) string { + if len(detail) <= MaxErrorDetailSize { + return detail + } + return detail[:MaxErrorDetailSize-3] + "..." +} diff --git a/internal/handlers/operations.go b/internal/handlers/operations.go new file mode 100644 index 0000000..eebe965 --- /dev/null +++ b/internal/handlers/operations.go @@ -0,0 +1,276 @@ +// Package handlers provides HTTP handlers for the rdev API. +package handlers + +import ( + "errors" + "net/http" + "strconv" + "time" + + "github.com/go-chi/chi/v5" + "github.com/orchard9/rdev/internal/auth" + "github.com/orchard9/rdev/internal/domain" + "github.com/orchard9/rdev/internal/port" + "github.com/orchard9/rdev/pkg/api" +) + +// OperationsHandler handles operation query endpoints. +type OperationsHandler struct { + repo port.OperationRepository +} + +// NewOperationsHandler creates a new operations handler. +func NewOperationsHandler(repo port.OperationRepository) *OperationsHandler { + return &OperationsHandler{repo: repo} +} + +// Mount registers the operation routes. +func (h *OperationsHandler) Mount(r api.Router) { + r.Route("/projects/{id}/operations", func(r chi.Router) { + r.With(auth.RequireScope(auth.ScopeProjectsRead, auth.ScopeAdmin)).Get("/", h.List) + r.With(auth.RequireScope(auth.ScopeProjectsRead, auth.ScopeAdmin)).Get("/{operation_id}", h.Get) + }) +} + +// OperationSummaryResponse is the response for listing operations. +type OperationSummaryResponse struct { + ID string `json:"id"` + Type string `json:"type"` + Status string `json:"status"` + StartedAt string `json:"started_at"` + DurationMs int64 `json:"duration_ms,omitempty"` + Error string `json:"error,omitempty"` + StepsSummary string `json:"steps_summary,omitempty"` + CommitSHA *string `json:"commit_sha,omitempty"` + ExternalRef *string `json:"external_ref,omitempty"` +} + +// OperationStepResponse is the response for a single operation step. +type OperationStepResponse struct { + Name string `json:"name"` + Status string `json:"status"` + StartedAt string `json:"started_at"` + DurationMs int64 `json:"duration_ms,omitempty"` + Output map[string]any `json:"output,omitempty"` + Error string `json:"error,omitempty"` + ErrorDetail string `json:"error_detail,omitempty"` +} + +// OperationDetailResponse is the full response for a single operation. +type OperationDetailResponse struct { + ID string `json:"id"` + ProjectID string `json:"project_id"` + Type string `json:"type"` + Status string `json:"status"` + RequestID string `json:"request_id,omitempty"` + TriggeredBy string `json:"triggered_by,omitempty"` + CommitSHA string `json:"commit_sha,omitempty"` + ExternalRef string `json:"external_ref,omitempty"` + StartedAt string `json:"started_at"` + CompletedAt *string `json:"completed_at,omitempty"` + DurationMs int64 `json:"duration_ms,omitempty"` + Input map[string]any `json:"input,omitempty"` + Output map[string]any `json:"output,omitempty"` + Error string `json:"error,omitempty"` + ErrorDetail string `json:"error_detail,omitempty"` + Steps []OperationStepResponse `json:"steps,omitempty"` +} + +// List returns operations for a project with optional filters. +// GET /projects/{id}/operations +// Query parameters: +// - status: filter by status (pending, running, completed, failed) +// - type: filter by type (project.create, component.add, build, resource.provision) +// - commit: filter by commit SHA +// - since: filter by start time (RFC3339 or duration like "1h", "24h") +// - limit: maximum number of entries (default 50, max 200) +func (h *OperationsHandler) List(w http.ResponseWriter, r *http.Request) { + projectID := chi.URLParam(r, "id") + if projectID == "" { + api.WriteBadRequest(w, r, "project ID is required") + return + } + + filters := domain.DefaultOperationFilters() + filters.ProjectID = projectID + + // Parse status filter + if status := r.URL.Query().Get("status"); status != "" { + s := domain.OperationStatus(status) + if !s.IsValid() { + api.WriteBadRequest(w, r, "invalid status: must be pending, running, completed, or failed") + return + } + filters.Status = s + } + + // Parse type filter + if opType := r.URL.Query().Get("type"); opType != "" { + t := domain.OperationType(opType) + if !t.IsValid() { + api.WriteBadRequest(w, r, "invalid type: must be project.create, component.add, build, or resource.provision") + return + } + filters.Type = t + } + + // Parse commit filter + if commit := r.URL.Query().Get("commit"); commit != "" { + filters.CommitSHA = commit + } + + // Parse since filter (RFC3339 or duration) + if sinceStr := r.URL.Query().Get("since"); sinceStr != "" { + since, err := parseSince(sinceStr) + if err != nil { + api.WriteBadRequest(w, r, "invalid since: must be RFC3339 or duration (e.g., 1h, 24h)") + return + } + filters.Since = since + } + + // Parse limit + if limitStr := r.URL.Query().Get("limit"); limitStr != "" { + limit, err := strconv.Atoi(limitStr) + if err != nil || limit < 1 { + api.WriteBadRequest(w, r, "invalid limit: must be a positive integer") + return + } + filters.Limit = limit + } + + filters.Normalize() + + ops, err := h.repo.List(r.Context(), filters) + if err != nil { + api.WriteInternalError(w, r, "failed to list operations") + return + } + + resp := make([]OperationSummaryResponse, len(ops)) + for i, op := range ops { + resp[i] = toOperationSummary(op) + } + + api.WriteSuccess(w, r, map[string]any{ + "data": resp, + "project_id": projectID, + "total": len(resp), + }) +} + +// Get returns details for a single operation. +// GET /projects/{id}/operations/{operation_id} +func (h *OperationsHandler) Get(w http.ResponseWriter, r *http.Request) { + projectID := chi.URLParam(r, "id") + operationID := chi.URLParam(r, "operation_id") + + if projectID == "" { + api.WriteBadRequest(w, r, "project ID is required") + return + } + if operationID == "" { + api.WriteBadRequest(w, r, "operation ID is required") + return + } + + op, err := h.repo.Get(r.Context(), operationID) + if err != nil { + if errors.Is(err, domain.ErrOperationNotFound) { + api.WriteNotFound(w, r, "operation not found") + return + } + api.WriteInternalError(w, r, "failed to get operation") + return + } + + // Verify the operation belongs to the requested project + if op.ProjectID != projectID { + api.WriteNotFound(w, r, "operation not found") + return + } + + api.WriteSuccess(w, r, map[string]any{ + "data": toOperationDetail(op), + }) +} + +// toOperationSummary converts an Operation to a summary response. +func toOperationSummary(op *domain.Operation) OperationSummaryResponse { + resp := OperationSummaryResponse{ + ID: op.ID, + Type: string(op.Type), + Status: string(op.Status), + StartedAt: op.StartedAt.Format(time.RFC3339), + DurationMs: op.DurationMs, + Error: op.Error, + StepsSummary: op.StepsSummary(), + } + + if op.CommitSHA != "" { + resp.CommitSHA = &op.CommitSHA + } + if op.ExternalRef != "" { + resp.ExternalRef = &op.ExternalRef + } + + return resp +} + +// toOperationDetail converts an Operation to a full detail response. +func toOperationDetail(op *domain.Operation) OperationDetailResponse { + resp := OperationDetailResponse{ + ID: op.ID, + ProjectID: op.ProjectID, + Type: string(op.Type), + Status: string(op.Status), + RequestID: op.RequestID, + TriggeredBy: op.TriggeredBy, + CommitSHA: op.CommitSHA, + ExternalRef: op.ExternalRef, + StartedAt: op.StartedAt.Format(time.RFC3339), + DurationMs: op.DurationMs, + Input: op.Input, + Output: op.Output, + Error: op.Error, + ErrorDetail: op.ErrorDetail, + } + + if op.CompletedAt != nil { + s := op.CompletedAt.Format(time.RFC3339) + resp.CompletedAt = &s + } + + if len(op.Steps) > 0 { + resp.Steps = make([]OperationStepResponse, len(op.Steps)) + for i, step := range op.Steps { + resp.Steps[i] = OperationStepResponse{ + Name: step.Name, + Status: string(step.Status), + StartedAt: step.StartedAt.Format(time.RFC3339), + DurationMs: step.DurationMs, + Output: step.Output, + Error: step.Error, + ErrorDetail: step.ErrorDetail, + } + } + } + + return resp +} + +// parseSince parses a "since" parameter as either RFC3339 time or duration. +func parseSince(s string) (time.Time, error) { + // Try RFC3339 first + if t, err := time.Parse(time.RFC3339, s); err == nil { + return t, nil + } + + // Try duration (e.g., "1h", "24h", "7d") + d, err := time.ParseDuration(s) + if err != nil { + return time.Time{}, err + } + + return time.Now().Add(-d), nil +} diff --git a/internal/handlers/woodpecker_webhook.go b/internal/handlers/woodpecker_webhook.go index 3c49c7d..cab76d9 100644 --- a/internal/handlers/woodpecker_webhook.go +++ b/internal/handlers/woodpecker_webhook.go @@ -184,7 +184,6 @@ func (h *WoodpeckerWebhookHandler) HandleWebhook(w http.ResponseWriter, r *http. commitShort = commitShort[:8] } imageTag := fmt.Sprintf("%s/%s:%s", h.registryURL, projectName, commitShort) - imageLatest := fmt.Sprintf("%s/%s:latest", h.registryURL, projectName) h.logger.Info("triggering deployment", "project", projectName, @@ -207,38 +206,20 @@ func (h *WoodpeckerWebhookHandler) HandleWebhook(w http.ResponseWriter, r *http. } } - // Deploy - if h.deployer == nil { - api.WriteInternalError(w, r, "deployer not configured") - return - } - - deployDomain := projectName + "." + h.defaultDomain - - err = h.deployer.Deploy(ctx, domain.DeploySpec{ - ProjectName: projectName, - Image: imageLatest, // Use :latest tag, Woodpecker should push both - Domain: deployDomain, - Port: 8080, - Replicas: 1, - }) - if err != nil { - h.logger.Error("deployment failed", "error", err, "project", projectName) - api.WriteInternalError(w, r, "deployment failed") - return - } - - h.logger.Info("deployment triggered successfully", + // Note: Project-level deployment is skipped for composable projects. + // Component deployments are created by createInitialComponentDeployment + // and updated by the CI pipeline's kubectl set image commands. + h.logger.Info("build succeeded, component deployments updated by CI", "project", projectName, - "url", "https://"+deployDomain, + "commit", payload.Build.Commit, ) api.WriteSuccess(w, r, map[string]any{ - "status": "deployed", + "status": "success", "project": projectName, "image": imageTag, - "url": "https://" + deployDomain, "commit": payload.Build.Commit, + "note": "component deployments managed by CI pipeline", }) } diff --git a/internal/port/operation.go b/internal/port/operation.go new file mode 100644 index 0000000..6424563 --- /dev/null +++ b/internal/port/operation.go @@ -0,0 +1,55 @@ +// Package port defines interface contracts for external adapters. +package port + +import ( + "context" + "time" + + "github.com/orchard9/rdev/internal/domain" +) + +// OperationRepository manages operation records for debugging and observability. +// Operations are tracked with steps, enabling developers to pinpoint failures +// without digging through logs. +type OperationRepository interface { + // Create creates a new operation record. + Create(ctx context.Context, op *domain.Operation) error + + // Update updates an existing operation record. + Update(ctx context.Context, op *domain.Operation) error + + // Get retrieves an operation by ID. + // Returns ErrOperationNotFound if the operation does not exist. + Get(ctx context.Context, id string) (*domain.Operation, error) + + // GetByCommitSHA finds the operation that created a specific commit. + // Used to link builds to the operation that triggered them. + // Returns ErrOperationNotFound if no operation matches. + GetByCommitSHA(ctx context.Context, projectID, sha string) (*domain.Operation, error) + + // List returns operations matching the filter criteria. + List(ctx context.Context, filter domain.OperationFilters) ([]*domain.Operation, error) + + // AddStep appends a new step to an operation. + AddStep(ctx context.Context, operationID string, step domain.OperationStep) error + + // UpdateStep updates an existing step within an operation. + // The step is identified by name. + UpdateStep(ctx context.Context, operationID string, step domain.OperationStep) error + + // Complete marks an operation as completed or failed. + // Sets completed_at, duration_ms, and optionally output/error fields. + Complete(ctx context.Context, operationID string, status domain.OperationStatus, output map[string]any, errMsg, errDetail string) error + + // SetCommitSHA updates the commit_sha field for an operation. + // Called after a git commit is created as part of the operation. + SetCommitSHA(ctx context.Context, operationID, sha string) error + + // SetTriggeredBy sets the triggered_by field to link to a parent operation. + SetTriggeredBy(ctx context.Context, operationID, parentID string) error + + // DeleteOlderThan removes operations older than the specified time. + // Returns the number of deleted records. + // Used by the cleanup worker for 30-day retention. + DeleteOlderThan(ctx context.Context, cutoff time.Time) (int64, error) +} diff --git a/internal/service/component.go b/internal/service/component.go index 0ea02f2..46ce1b5 100644 --- a/internal/service/component.go +++ b/internal/service/component.go @@ -103,8 +103,8 @@ func (s *ComponentService) AddComponent(ctx context.Context, projectID string, r return nil, fmt.Errorf("failed to get project: %w", err) } - // Build Go module path - goModule = fmt.Sprintf("github.com/%s/%s", gitRepoOwner, gitRepoName) + // Build Go module path (use actual git host, not github.com) + goModule = fmt.Sprintf("git.threesix.ai/%s/%s", gitRepoOwner, gitRepoName) // 4. Calculate component path destDir := componentType.DestDir() diff --git a/internal/service/operation_service.go b/internal/service/operation_service.go new file mode 100644 index 0000000..ba28797 --- /dev/null +++ b/internal/service/operation_service.go @@ -0,0 +1,224 @@ +// Package service provides business logic services. +package service + +import ( + "context" + "log/slog" + "time" + + "github.com/google/uuid" + "github.com/orchard9/rdev/internal/domain" + "github.com/orchard9/rdev/internal/port" +) + +// OperationService provides business logic for tracking operations. +// It wraps the repository with convenient methods for step-by-step tracking. +type OperationService struct { + repo port.OperationRepository + logger *slog.Logger +} + +// NewOperationService creates a new operation service. +func NewOperationService(repo port.OperationRepository, logger *slog.Logger) *OperationService { + if logger == nil { + logger = slog.Default() + } + return &OperationService{ + repo: repo, + logger: logger.With("service", "operation"), + } +} + +// StartOperation creates a new operation and returns its ID. +// The operation starts in "running" status. +func (s *OperationService) StartOperation( + ctx context.Context, + projectID string, + opType domain.OperationType, + input map[string]any, + requestID string, +) (string, error) { + op := &domain.Operation{ + ID: uuid.New().String(), + ProjectID: projectID, + Type: opType, + Status: domain.OperationStatusRunning, + RequestID: requestID, + StartedAt: time.Now(), + Input: input, + Steps: []domain.OperationStep{}, + } + + if err := s.repo.Create(ctx, op); err != nil { + s.logger.Error("failed to create operation", + "error", err, + "project_id", projectID, + "type", opType, + ) + return "", err + } + + s.logger.Info("operation started", + "operation_id", op.ID, + "project_id", projectID, + "type", opType, + ) + + return op.ID, nil +} + +// StartStep adds a new step to an operation and marks it as running. +func (s *OperationService) StartStep(ctx context.Context, operationID, stepName string) error { + step := domain.OperationStep{ + Name: stepName, + Status: domain.OperationStatusRunning, + StartedAt: time.Now(), + } + + if err := s.repo.AddStep(ctx, operationID, step); err != nil { + s.logger.Error("failed to start step", + "error", err, + "operation_id", operationID, + "step", stepName, + ) + return err + } + + return nil +} + +// CompleteStep marks a step as completed with optional output. +func (s *OperationService) CompleteStep( + ctx context.Context, + operationID, stepName string, + startedAt time.Time, + output map[string]any, +) error { + step := domain.OperationStep{ + Name: stepName, + Status: domain.OperationStatusCompleted, + StartedAt: startedAt, + DurationMs: time.Since(startedAt).Milliseconds(), + Output: output, + } + + if err := s.repo.UpdateStep(ctx, operationID, step); err != nil { + s.logger.Error("failed to complete step", + "error", err, + "operation_id", operationID, + "step", stepName, + ) + return err + } + + return nil +} + +// FailStep marks a step as failed with error details. +func (s *OperationService) FailStep( + ctx context.Context, + operationID, stepName string, + startedAt time.Time, + errMsg, errDetail string, +) error { + step := domain.OperationStep{ + Name: stepName, + Status: domain.OperationStatusFailed, + StartedAt: startedAt, + DurationMs: time.Since(startedAt).Milliseconds(), + Error: errMsg, + ErrorDetail: domain.TruncateErrorDetail(errDetail), + } + + if err := s.repo.UpdateStep(ctx, operationID, step); err != nil { + s.logger.Error("failed to fail step", + "error", err, + "operation_id", operationID, + "step", stepName, + ) + return err + } + + return nil +} + +// CompleteOperation marks the operation as completed with optional output. +func (s *OperationService) CompleteOperation( + ctx context.Context, + operationID string, + output map[string]any, +) error { + if err := s.repo.Complete(ctx, operationID, domain.OperationStatusCompleted, output, "", ""); err != nil { + s.logger.Error("failed to complete operation", + "error", err, + "operation_id", operationID, + ) + return err + } + + s.logger.Info("operation completed", + "operation_id", operationID, + ) + + return nil +} + +// FailOperation marks the operation as failed with error details. +func (s *OperationService) FailOperation( + ctx context.Context, + operationID string, + errMsg, errDetail string, +) error { + if err := s.repo.Complete(ctx, operationID, domain.OperationStatusFailed, nil, errMsg, errDetail); err != nil { + s.logger.Error("failed to fail operation", + "error", err, + "operation_id", operationID, + ) + return err + } + + s.logger.Info("operation failed", + "operation_id", operationID, + "error", errMsg, + ) + + return nil +} + +// SetCommitSHA updates the commit SHA for an operation. +// Called after a git commit is created as part of the operation. +func (s *OperationService) SetCommitSHA(ctx context.Context, operationID, sha string) error { + return s.repo.SetCommitSHA(ctx, operationID, sha) +} + +// SetExternalRef updates the external reference for an operation. +// Called when linking to external systems like Woodpecker builds. +func (s *OperationService) SetExternalRef(ctx context.Context, operationID, ref string) error { + op, err := s.repo.Get(ctx, operationID) + if err != nil { + return err + } + op.ExternalRef = ref + return s.repo.Update(ctx, op) +} + +// FindByCommit finds the operation that created a specific commit. +// Used to link builds to the operation that triggered them. +func (s *OperationService) FindByCommit(ctx context.Context, projectID, sha string) (*domain.Operation, error) { + return s.repo.GetByCommitSHA(ctx, projectID, sha) +} + +// Get retrieves an operation by ID. +func (s *OperationService) Get(ctx context.Context, operationID string) (*domain.Operation, error) { + return s.repo.Get(ctx, operationID) +} + +// List returns operations matching the filter criteria. +func (s *OperationService) List(ctx context.Context, filter domain.OperationFilters) ([]*domain.Operation, error) { + return s.repo.List(ctx, filter) +} + +// LinkToParent sets the triggered_by field to link to a parent operation. +func (s *OperationService) LinkToParent(ctx context.Context, operationID, parentID string) error { + return s.repo.SetTriggeredBy(ctx, operationID, parentID) +} diff --git a/internal/service/project_infra_crud.go b/internal/service/project_infra_crud.go index b060f4f..e53397d 100644 --- a/internal/service/project_infra_crud.go +++ b/internal/service/project_infra_crud.go @@ -170,7 +170,8 @@ func (s *ProjectInfraService) createGitRepo(ctx context.Context, req CreateProje WHERE id = $7 `, repo.Owner, repo.Name, repo.CloneSSH, repo.CloneHTTP, repo.HTMLURL, time.Now(), projectID) if err != nil { - s.logger.Error("failed to update project with git info", "error", err, "project", projectID) + s.logger.Warn("failed to update project with git info", "error", err, "project", projectID) + result.NextSteps = append(result.NextSteps, "Git repo created but metadata not persisted - re-run create to sync") } } @@ -204,7 +205,8 @@ func (s *ProjectInfraService) createPrimaryDNS(ctx context.Context, slug, autoDo Verified: true, } if err := s.domainRepo.Create(ctx, pd); err != nil { - s.logger.Error("failed to store primary domain", "error", err) + s.logger.Warn("failed to store primary domain", "error", err) + result.NextSteps = append(result.NextSteps, "DNS created but domain metadata not persisted") } else { result.Domains = append(result.Domains, pd) } @@ -215,7 +217,8 @@ func (s *ProjectInfraService) createPrimaryDNS(ctx context.Context, slug, autoDo UPDATE projects SET domain = $1, updated_at = $2 WHERE id = $3 `, result.Domain, time.Now(), projectID) if err != nil { - s.logger.Error("failed to update project with domain", "error", err, "project", projectID) + s.logger.Warn("failed to update project with domain", "error", err, "project", projectID) + // Not adding to NextSteps - legacy column, domain still works via project_domains table } } @@ -247,7 +250,8 @@ func (s *ProjectInfraService) createCustomDNS(ctx context.Context, req CreatePro Verified: true, } if err := s.domainRepo.Create(ctx, pd); err != nil { - s.logger.Error("failed to store custom domain", "error", err) + s.logger.Warn("failed to store custom domain", "error", err) + result.NextSteps = append(result.NextSteps, "Custom DNS created but domain metadata not persisted") } else { result.Domains = append(result.Domains, pd) // Custom domain becomes the primary for display @@ -291,8 +295,8 @@ func (s *ProjectInfraService) seedTemplate(ctx context.Context, req CreateProjec templateName = "skeleton" // Default to composable monorepo skeleton } - // Build Go module path for the project - goModule := fmt.Sprintf("github.com/%s/%s", result.GitRepoOwner, result.GitRepoName) + // Build Go module path for the project (use actual git host, not github.com) + goModule := fmt.Sprintf("git.threesix.ai/%s/%s", result.GitRepoOwner, result.GitRepoName) vars := map[string]string{ "PROJECT_NAME": req.Name, @@ -453,7 +457,8 @@ func (s *ProjectInfraService) createInitialDeployment(ctx context.Context, req C WHERE id = $5 `, imageName, "pending", 1, time.Now(), req.Name) if err != nil { - s.logger.Error("failed to update project with deployment info", "error", err, "project", req.Name) + s.logger.Warn("failed to update project with deployment info", "error", err, "project", req.Name) + result.NextSteps = append(result.NextSteps, "Deployment created but status not persisted - status may show stale") } } diff --git a/internal/worker/operation_cleanup.go b/internal/worker/operation_cleanup.go new file mode 100644 index 0000000..3624675 --- /dev/null +++ b/internal/worker/operation_cleanup.go @@ -0,0 +1,125 @@ +package worker + +import ( + "context" + "log/slog" + "sync" + "time" + + "github.com/orchard9/rdev/internal/port" +) + +// OperationCleanup runs periodic cleanup of old operations. +// Operations older than the retention period (default 30 days) are deleted. +type OperationCleanup struct { + repo port.OperationRepository + logger *slog.Logger + retentionPeriod time.Duration + cleanupInterval time.Duration + + ctx context.Context + cancel context.CancelFunc + wg sync.WaitGroup +} + +// OperationCleanupConfig holds configuration for operation cleanup. +type OperationCleanupConfig struct { + // RetentionPeriod is how long to keep operations. + // Default: 30 days. + RetentionPeriod time.Duration + + // CleanupInterval is how often to run cleanup. + // Default: 1 hour. + CleanupInterval time.Duration + + Logger *slog.Logger +} + +// DefaultOperationCleanupConfig returns sensible defaults. +func DefaultOperationCleanupConfig() *OperationCleanupConfig { + return &OperationCleanupConfig{ + RetentionPeriod: 30 * 24 * time.Hour, // 30 days + CleanupInterval: 1 * time.Hour, + Logger: slog.Default(), + } +} + +// NewOperationCleanup creates a new operation cleanup worker. +func NewOperationCleanup(repo port.OperationRepository, cfg *OperationCleanupConfig) *OperationCleanup { + if cfg == nil { + cfg = DefaultOperationCleanupConfig() + } + + ctx, cancel := context.WithCancel(context.Background()) + + return &OperationCleanup{ + repo: repo, + logger: cfg.Logger.With("component", "operation-cleanup"), + retentionPeriod: cfg.RetentionPeriod, + cleanupInterval: cfg.CleanupInterval, + ctx: ctx, + cancel: cancel, + } +} + +// Start begins the cleanup loop. +func (c *OperationCleanup) Start() { + c.logger.Info("operation cleanup started", + "retention_period", c.retentionPeriod, + "cleanup_interval", c.cleanupInterval, + ) + + c.wg.Add(1) + go c.cleanupLoop() +} + +// Stop gracefully shuts down the cleanup worker. +func (c *OperationCleanup) Stop() { + c.logger.Info("operation cleanup stopping") + c.cancel() + c.wg.Wait() + c.logger.Info("operation cleanup stopped") +} + +// cleanupLoop runs periodic cleanup. +func (c *OperationCleanup) cleanupLoop() { + defer c.wg.Done() + + // Run immediately on start + c.runCleanup() + + ticker := time.NewTicker(c.cleanupInterval) + defer ticker.Stop() + + for { + select { + case <-c.ctx.Done(): + return + case <-ticker.C: + c.runCleanup() + } + } +} + +// runCleanup deletes operations older than the retention period. +func (c *OperationCleanup) runCleanup() { + ctx, cancel := context.WithTimeout(c.ctx, 30*time.Second) + defer cancel() + + cutoff := time.Now().Add(-c.retentionPeriod) + deleted, err := c.repo.DeleteOlderThan(ctx, cutoff) + if err != nil { + c.logger.Error("failed to cleanup old operations", + "error", err, + "cutoff", cutoff, + ) + return + } + + if deleted > 0 { + c.logger.Info("cleaned up old operations", + "deleted", deleted, + "cutoff", cutoff, + ) + } +}