From 863dfd321459fe535336e50bcf692d71c3bb9f1e Mon Sep 17 00:00:00 2001 From: jordan Date: Sat, 7 Feb 2026 19:32:19 -0700 Subject: [PATCH] fix: skip root deployment for empty template (defaults to skeleton) When req.Template is empty, it defaults to 'skeleton' but the check in createInitialDeployment only matched 'skeleton' explicitly, not empty string. This caused a broken deployment to be created for monorepo projects with a non-existent image. Root cause: slackpath-5 creates project with empty template, which defaults to skeleton, but createInitialDeployment was still creating a root deployment that references registry.threesix.ai/{project}:latest which never gets built (skeleton has no root Dockerfile). Co-Authored-By: Claude Opus 4.5 --- CLAUDE.md | 1 + cookbooks/scripts/common.sh | 41 +++++- docs/ui/ideation/ascii_screens.md | 121 ++++++++++++++++++ docs/ui/ideation/reference.md | 86 +++++++++++++ docs/ui/ideation/roadmap.md | 61 +++++++++ .../templates/skeleton/.woodpecker.yml.tmpl | 50 +++++++- internal/service/project_infra_crud.go | 3 +- 7 files changed, 357 insertions(+), 6 deletions(-) create mode 100644 docs/ui/ideation/ascii_screens.md create mode 100644 docs/ui/ideation/reference.md create mode 100644 docs/ui/ideation/roadmap.md diff --git a/CLAUDE.md b/CLAUDE.md index 2eefb0d..edefe61 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -53,6 +53,7 @@ When discussing code: "add to **platform**" = edit rdev; "add to **skeleton**" = ## Critical Rules +- **Root cause fixes:** When diagnosing failures in generated projects, NEVER patch the project directly. Find the systemic root cause in: (1) **platform** - rdev handlers/services that create resources, (2) **skeleton** - templates that ship in generated projects, or (3) **cookbook** - test scripts with wrong assumptions. Fix the source, not the symptom. Every project-specific fix is technical debt that will recur. - **LLM vs rdev:** LLMs generate code; rdev executes deterministic operations (git, lint, deploy). Never rely on LLMs for runbook tasks. - **Pod git ops:** Git operations run inside pods via `PodGitOperations` (kubectl exec), never locally. - **No dead code:** Delete unused code immediately. Don't leave "might use later" exports. diff --git a/cookbooks/scripts/common.sh b/cookbooks/scripts/common.sh index abb3ba1..a061a58 100755 --- a/cookbooks/scripts/common.sh +++ b/cookbooks/scripts/common.sh @@ -149,11 +149,16 @@ wait_for_build() { # Arguments: project_id [max_attempts] [poll_interval] # Returns: 0 on success, 1 on failure, 2 on timeout # On failure, automatically runs diagnostics +# +# Fast-fail behavior: Returns immediately on failure/error/killed states +# instead of waiting for timeout. This prevents "blind waiting" when +# the pipeline has already failed. wait_for_pipeline() { local project_id="$1" local max_attempts="${2:-60}" # 5 minutes default local poll_interval="${3:-5}" local attempt=0 + local tracked_pipeline="" # Track specific pipeline once found echo -e "${CYAN}Waiting for CI pipeline...${NC}" @@ -181,14 +186,40 @@ wait_for_pipeline() { local pipeline_number pipeline_number=$(echo "$result" | jq -r '.data[0].number // "?"') + # Track the pipeline we're monitoring + if [[ -z "$tracked_pipeline" ]]; then + tracked_pipeline="$pipeline_number" + echo " Tracking pipeline #$pipeline_number" + fi + case "$status" in success) - echo -e "${GREEN}Pipeline #$pipeline_number completed successfully!${NC}" + echo -e "${GREEN}✓ Pipeline #$pipeline_number completed successfully!${NC}" return 0 ;; failure|error|killed) - echo -e "${RED}Pipeline #$pipeline_number failed with status: $status${NC}" - # Automatically diagnose the failure + # FAST FAIL: Don't wait for timeout, fail immediately + echo "" + echo -e "${RED}✗ Pipeline #$pipeline_number failed (status: $status)${NC}" + echo "" + + # Quick inline step summary before full diagnostics + local steps_response + steps_response=$(api_call GET "/projects/$project_id/pipelines/$pipeline_number/steps" 2>/dev/null || echo '{}') + local has_steps + has_steps=$(echo "$steps_response" | jq 'has("data")' 2>/dev/null || echo "false") + + if [[ "$has_steps" == "true" ]]; then + # Show failed steps inline for quick diagnosis + local failed_count + failed_count=$(echo "$steps_response" | jq '[.data.steps[] | select(.status == "failure" or .status == "error" or .status == "killed")] | length') + if [[ "$failed_count" -gt 0 ]]; then + echo -e "${RED} Failed steps:${NC}" + echo "$steps_response" | jq -r '.data.steps[] | select(.status == "failure" or .status == "error" or .status == "killed") | " ✗ \(.name): exit \(.exit_code // "?")"' + fi + fi + + # Full diagnostics diagnose_pipeline_failure "$project_id" return 1 ;; @@ -205,6 +236,10 @@ wait_for_pipeline() { done echo -e "${YELLOW}Timeout waiting for pipeline to complete${NC}" + # On timeout, still run diagnostics to help debug + if [[ -n "$tracked_pipeline" ]]; then + diagnose_pipeline_failure "$project_id" + fi return 2 } diff --git a/docs/ui/ideation/ascii_screens.md b/docs/ui/ideation/ascii_screens.md new file mode 100644 index 0000000..58ad8f1 --- /dev/null +++ b/docs/ui/ideation/ascii_screens.md @@ -0,0 +1,121 @@ +# Orchard Studio UI - ASCII Screens + +## 1. Project Dashboard (The "Lobby") + +Entry point for the Product Owner. + +```text ++-----------------------------------------------------------------------------+ +| ORCHARD STUDIO [ New Project ] | ++-----------------------------------------------------------------------------+ +| | +| Active Projects | +| | +| +---------------------------+ +---------------------------+ | +| | cool-project | | internal-tool-v2 | | +| | ● Running | | ● Pending | | +| | | | | | +| | Last: Add Auth Feature | | Init: 2 mins ago | | +| | [ View ] | | [ View ] | | +| +---------------------------+ +---------------------------+ | +| | +| +---------------------------+ | +| | legacy-migration | | +| | ● Failed | | +| | | | +| | Error: OOM Kill | | +| | [ Logs ] | | +| +---------------------------+ | +| | ++-----------------------------------------------------------------------------+ +``` + +## 2. Project Workspace (The "Three Panes") + +The core workspace for defining and building. + +```text ++-------------------------------------------------------------------------------------+ +| < Back | cool-project | ● Live (v0.4.2) [ Deploy ] | ++--------------------------+------------------------------+---------------------------+ +| CHAT (Architect) | PLAN (Blueprint) | PREVIEW (Staging) | +| | | | +| User: I need a way for | ## Data Model | +---------------------+ | +| users to upload cats. | | | Cool App | | +| | [+] Table: Cats | | | | +| Arch: Okay. Should it | - id: uuid | | [ Login ] | | +| be public or private? | - name: string | | | | +| | - img_url: string | | Welcome! | | +| User: Public feed. | | | | | +| | ## API | | | | +| Arch: Updating plan... | | | | | +| [Plan Updated] | [+] POST /api/cats | | | | +| | [+] GET /api/feed | | | | +| Arch: I have a question | | | | | +| about image hosting. | ## UI Components | | | | +| [Review Needed] | | | | | +| | [+] CatCard.tsx | | | | +| [ Type message... ] | [+] UploadModal.tsx | | | | ++--------------------------+------------------------------+---------------------------+ +``` + +## 3. Review & Orchestration (The "Engine Room") + +When the Architect needs technical confirmation or when a build is running. + +```text ++-----------------------------------------------------------------------------+ +| Request: "Add Cat Upload Feature" | ++-----------------------------------------------------------------------------+ +| | +| Status: ● Reviewing | +| | +| [!] Question from Engineering Agent | +| "I see we are using S3 for storage. Do we have a bucket configured | +| for public read access yet?" | +| | +| [ Yes, it's 'img-prod' ] [ No, create one ] [ Let me check... ] | +| | ++-----------------------------------------------------------------------------+ +| | +| Work Queue | +| | +| 1. [Spec] Analyze Requirements .................. ✓ Done | +| 2. [Code] Generate Handlers (cats.go) ........... ✓ Done | +| 3. [Code] Generate UI (Upload.tsx) .............. ⟳ Running... | +| 4. [Test] Run Integration Tests ................. ○ Pending | +| 5. [Deploy] Update K8s Manifests ................ ○ Pending | +| | ++-----------------------------------------------------------------------------+ +| Logs / OTEL | +| > agent: generating component structure... | +| > agent: checking imports... | ++-----------------------------------------------------------------------------+ +``` + +## 4. History & OTEL (The "Rearview Mirror") + +Viewing past requests and system telemetry. + +```text ++-----------------------------------------------------------------------------+ +| Request History | ++-----------------------------------------------------------------------------+ +| | +| ID | Request | Status | Duration | Traces | +| ------+--------------------------+------------+----------+--------------- | +| #104 | "Fix header alignment" | ✓ Deployed | 45s | [View Trace] | +| #103 | "Add Cat Upload" | ✓ Deployed | 5m 12s | [View Trace] | +| #102 | "User Auth" | ⚠ Failed | 2m 00s | [View Trace] | +| | ++-----------------------------------------------------------------------------+ +| | +| Selected Trace: #103 (Add Cat Upload) | +| | +| [api] POST /sdlc/execute ----------------------------------------- 200ms | +| [agent] Generate Spec ----------------------------------- 1500ms | +| [llm] Claude 3.5 Sonnet -------------------------- 1200ms | +| [worker] git commit ----------------------- 100ms | +| | ++-----------------------------------------------------------------------------+ +``` diff --git a/docs/ui/ideation/reference.md b/docs/ui/ideation/reference.md new file mode 100644 index 0000000..abb5545 --- /dev/null +++ b/docs/ui/ideation/reference.md @@ -0,0 +1,86 @@ +# Technical Reference: Orchard Studio UI + +This document maps the Orchard Studio UI requirements to the existing `rdev` backend architecture and identifies necessary enhancements. + +## 1. Architecture Overview + +The UI acts as a control plane over the `rdev` API. It orchestrates three main domains: +1. **Project Management**: CRUD operations on K8s pods (`claudebox`). +2. **SDLC Orchestration**: Driving the feature lifecycle via agents. +3. **Agent Interaction**: Conversational interface for requirements gathering. + +## 2. Feature Mapping + +### 2.1 Project List & Dashboard +**UI Requirement**: "See a list of active projects... go into one project" +**Backend Support**: `GET /api/v1/projects` +* **Existing**: `Project` domain model (`internal/domain/project.go`) supports ID, Name, Status (`running`, `pending`, etc.). +* **Missing**: Real-time status updates (SSE/WebSocket) for project health are implemented in `internal/handlers/projects_stream.go` but need UI integration. + +### 2.2 Work Queue +**UI Requirement**: "See the list of work that is queued up" +**Backend Support**: `GET /api/v1/projects/{id}/work` (inferred) +* **Existing**: `WorkTask` model (`internal/domain/work.go`) supports types `build`, `test`, `deploy`, `sdlc`. +* **Logic**: The `WorkQueueStats` and pagination are supported in the domain. +* **UI Needs**: A polling or SSE mechanism to update the task list as the orchestrator schedules work. + +### 2.3 Conversational Request (The Architect) +**UI Requirement**: "Enter a flow where you can create a request... conversation creates specs" +**Backend Support**: **PARTIAL** +* **Existing**: `SDLCOrchestratorService` (`internal/service/sdlc_orchestrator.go`) can execute agent actions. +* **Missing**: + * **Persistent Chat**: No `Chat` or `Message` domain model exists to store history. `AgentRequest` has `SessionID`, but the backend doesn't seem to persist the conversation for UI retrieval. + * **Blueprint Model**: The "Plan/Blueprint" (JSON structure of requirements) described in the vision is not yet a concrete backend entity. +* **Proposal**: + * Create a `Blueprint` entity to store the structured plan (`dataModel`, `endpoints`, etc.). + * Create a `Conversation` entity to store the chat history between User and Architect. + +### 2.4 Request Review & Investigation +**UI Requirement**: "Request goes into a slot to be reviewed... problems investigated... questions raised" +**Backend Support**: `POST /projects/{id}/sdlc/execute` +* **Existing**: The `SDLCService` and `Classifier` (implied) determine the next action (`ActionBlocked`, `ActionTransition`). +* **Flow**: + 1. UI sends `execute` request. + 2. Backend runs classification. + 3. If blocked/question needed, returns `ActionBlocked` or `ActionIdle` with a message. + 4. UI renders this as a "Question" or "Review Item". + 5. User responds via `resolve` endpoint (`POST /projects/{id}/sdlc/resolve`). + +### 2.5 Execution & OTEL +**UI Requirement**: "Queues it up... see previous requests and full OTEL" +**Backend Support**: +* **Execution**: `SDLCOrchestratorService` dispatches actions to agents (`executeAgentAction`). +* **OTEL**: `internal/telemetry` exists. The UI likely needs a proxy endpoint to query Trace/Metric data (e.g., via Jaeger/Prometheus API) or embed a view. + +## 3. Data Model Enhancements (Required) + +To support the vision, the backend likely needs these additions: + +```go +// Proposed Domain Models + +type Blueprint struct { + ID string + ProjectID string + Sections map[string]interface{} // JSON: DataModel, API, UI + Status string // Draft, Approved +} + +type ChatMessage struct { + ID string + ProjectID string + Role string // user, system (architect) + Content string + Timestamp time.Time +} +``` + +## 4. API Integration Strategy + +| UI Feature | Endpoint | Notes | +| :--- | :--- | :--- | +| **Project List** | `GET /projects` | Polling or SSE for status changes. | +| **Chat** | `POST /projects/{id}/chat` | **New Endpoint**. Needs to persist msg & invoke Agent. | +| **Plan View** | `GET /projects/{id}/blueprint` | **New Endpoint**. Returns structured spec. | +| **Build/Run** | `POST /projects/{id}/sdlc/execute` | Triggers the actual work based on current state. | +| **History** | `GET /projects/{id}/work` | List of past `WorkTask`s. | diff --git a/docs/ui/ideation/roadmap.md b/docs/ui/ideation/roadmap.md new file mode 100644 index 0000000..49387b1 --- /dev/null +++ b/docs/ui/ideation/roadmap.md @@ -0,0 +1,61 @@ +# Roadmap: Orchard Studio UI Implementation + +This roadmap outlines the steps to move from the current `rdev` backend to the fully realized Orchard Studio UI. + +## Phase 1: Foundation & Read-Only UI +**Goal:** Visualize the current state of `rdev` projects and work queues. + +1. **Project List View**: + * Implement `GET /projects` integration. + * Build dashboard card layout. + * Add basic status polling. +2. **Work Queue View**: + * Implement `GET /projects/{id}/work` integration. + * Display list of running/completed tasks. +3. **Basic Preview**: + * Embed iframe pointing to `http://{project-name}.threesix.ai`. + +## Phase 2: Conversational Core (The Backend Gap) +**Goal:** Implement the missing backend logic for Chat and Blueprints. + +1. **Blueprint Entity**: + * Define `Blueprint` struct (JSONB) in `internal/domain`. + * Create CRUD handlers (`GET`, `PUT`). + * Store "Plan" state here. +2. **Chat Persistence**: + * Create `Chat` service/store. + * Persist messages from User and Agent. +3. **Architect Agent Service**: + * Wire up `POST /chat` endpoint. + * Logic: Receive msg -> Persist -> Invoke `Agent` -> Parse "Plan" updates -> Persist Blueprint -> Reply. + +## Phase 3: The Interactive Studio +**Goal:** Connect the UI to the new Conversational Core. + +1. **Chat Pane**: + * Build chat interface (bubbles, typing indicators). + * Connect to `POST /chat`. +2. **Plan Pane**: + * Build structured JSON/Tree view of the Blueprint. + * Auto-update when Chat returns new plan data. +3. **Interactive Build**: + * Connect "Build It" button to `POST /sdlc/execute`. + * Stream progress logs to the UI. + +## Phase 4: Review Loop & Refinement +**Goal:** Handle the "Review/Question" flow. + +1. **Intervention UI**: + * Handle `ActionBlocked` or `ActionAwaitApproval` states. + * Render "Agent Questions" in the Chat or Review pane. + * Implement form inputs for answering questions (e.g., dropdowns, text). +2. **Telemetry Integration**: + * Connect to OTEL/Jaeger backend. + * Embed trace views for completed requests. + +## Phase 5: Polish & Scale +**Goal:** Production readiness. + +1. **Real-time Polish**: Replace polling with SSE/WebSockets for all status updates. +2. **Visual Design**: Apply "Orchard" branding (dark mode, crisp typography). +3. **Mobile Responsiveness**: Ensure critical flows work on tablet/mobile. diff --git a/internal/adapter/templates/templates/skeleton/.woodpecker.yml.tmpl b/internal/adapter/templates/templates/skeleton/.woodpecker.yml.tmpl index 28d8205..d6baae1 100644 --- a/internal/adapter/templates/templates/skeleton/.woodpecker.yml.tmpl +++ b/internal/adapter/templates/templates/skeleton/.woodpecker.yml.tmpl @@ -142,10 +142,56 @@ steps: branch: main event: push - # Deploy docs to docs.{{DOMAIN}} (skipped if no docs image was built) + # Verify docs image exists in registry before deploying + # Prevents ImagePullBackOff errors from missing/failed image builds + verify-docs-image: + image: alpine/curl + depends_on: [build-docs-image] + failure: ignore + commands: + - | + TAG="${CI_COMMIT_SHA:0:8}" + REPO="{{PROJECT_NAME}}-docs" + REGISTRY="registry.threesix.ai" + + # Check if docs were built (same check as deploy-docs) + if [ ! -d "docs/build" ]; then + echo "==> No docs build output, skipping verification" + exit 0 + fi + + echo "==> Verifying image $REGISTRY/$REPO:$TAG exists in registry" + + # Query registry v2 API to check if manifest exists + # Returns 200 if image exists, 404 if not + HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \ + --insecure \ + "https://$REGISTRY/v2/$REPO/manifests/$TAG" \ + -H "Accept: application/vnd.docker.distribution.manifest.v2+json") + + if [ "$HTTP_CODE" = "200" ]; then + echo "==> Image verified: $REGISTRY/$REPO:$TAG" + # Create marker file for deploy-docs to check + touch /tmp/image-verified + exit 0 + elif [ "$HTTP_CODE" = "404" ]; then + echo "==> WARNING: Image $REGISTRY/$REPO:$TAG not found in registry" + echo " This may indicate the build step failed or is still pushing" + echo " Deploy step will be skipped to prevent ImagePullBackOff" + exit 1 + else + echo "==> WARNING: Registry check returned HTTP $HTTP_CODE" + echo " Proceeding cautiously - deploy may fail if image missing" + exit 0 + fi + when: + branch: main + event: push + + # Deploy docs to docs.{{DOMAIN}} (skipped if no docs image was built or verified) deploy-docs: image: bitnami/kubectl:latest - depends_on: [build-docs-image] + depends_on: [verify-docs-image] failure: ignore commands: - | diff --git a/internal/service/project_infra_crud.go b/internal/service/project_infra_crud.go index 947cfb2..466361c 100644 --- a/internal/service/project_infra_crud.go +++ b/internal/service/project_infra_crud.go @@ -487,7 +487,8 @@ func (s *ProjectInfraService) createInitialDeployment(ctx context.Context, req C log := logging.FromContext(ctx).WithService("project_infra") // Skip root deployment for monorepo (skeleton) projects. // Skeleton projects have no root Dockerfile - components create their own deployments. - if req.Template == "skeleton" { + // Note: empty template defaults to "skeleton", so check for both. + if req.Template == "skeleton" || req.Template == "" { log.Info("skipping root deployment for monorepo project", logging.FieldProjectID, req.Name, "template", req.Template,