feat: Implement hexagonal architecture with services, webhooks, queue, and telemetry

Major refactoring to hexagonal (ports & adapters) architecture: - Add service layer (apikey_service, project_service) for business logic - Add webhook system with dispatcher and delivery tracking - Add command queue with priority-based processing - Add rate limiting with sliding window algorithm - Add audit logging for command execution - Add OpenTelemetry integration (traces, metrics, spans) - Add circuit breaker for fault tolerance - Add cached repository wrapper for performance - Add comprehensive validation package - Add Kubernetes client integration for pod management - Add database migrations (allowed_ips, audit_log, rate_limiting, queue, webhooks) - Add network policy and PodDisruptionBudget for k8s - Remove legacy executor and projects/registry packages - Untrack secrets.yaml (now managed via envault) - Add coverage.out to .gitignore - Add e2e test infrastructure with docker-compose - Add comprehensive documentation (API, architecture, operations, plans) - Add golangci-lint config and pre-commit hook Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-25 19:57:46 -07:00 · 2026-01-25 19:57:46 -07:00 · 72d16929ca
commit 72d16929ca
parent 214d1c8a4c
119 changed files with 24314 additions and 1202 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,6 +3,7 @@
 *.credentials*
 *.key
 *.pem
+.secrets

 # Kubernetes secrets with real values (use *.example as template)
 deployments/k8s/base/secrets.yaml
@ -25,6 +26,7 @@ Thumbs.db
 *.tar
 *.gz
 /rdev-api
+coverage.out

 # Deploy keys (generated, never commit)
 *-deploy-key
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1,66 @@
+# Changelog
+
+All notable changes to rdev will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [1.0.0] - 2024-01-25
+
+### Added
+
+#### Core Features
+- REST API for remote development environments
+- SSE streaming for real-time command output
+- Support for Claude, shell, and git commands
+- Project discovery via Kubernetes labels
+
+#### Security
+- API key authentication with scopes (projects:read, projects:execute, keys:read, keys:write, admin)
+- IP allowlisting for API keys
+- Command sanitization to prevent injection attacks
+- Rate limiting per API key
+- Concurrent command limiting per project
+
+#### Kubernetes Integration
+- Label-based project discovery (`rdev.orchard9.ai/project=true`)
+- ConfigMap support for project configuration
+- Pod watch for real-time status updates
+- Service account RBAC with minimal permissions
+- NetworkPolicy for ingress/egress control
+
+#### Reliability
+- Circuit breaker for Kubernetes API calls
+- Graceful shutdown with 30-second timeout
+- Health checks (liveness and readiness)
+- Response caching with TTL
+- Connection pool tuning
+
+#### Observability
+- Prometheus metrics endpoint
+- Structured JSON logging
+- Request ID tracking
+
+#### Documentation
+- Architecture documentation (hexagonal pattern)
+- API documentation with examples
+- Operations documentation with runbooks
+- SSE client examples (JavaScript, Python, Go)
+
+### Architecture
+- Hexagonal architecture (ports and adapters)
+- Domain-driven design with clean separation
+- Comprehensive test suite with benchmarks
+
+### Dependencies
+- Go 1.22+
+- chi v5 for HTTP routing
+- PostgreSQL for API key storage
+- Kubernetes client-go
+
+## [Unreleased]
+
+### Planned
+- OpenTelemetry integration (requires OTLP collector)
+- Horizontal Pod Autoscaler support
+- Multi-cluster support
--- a/Dockerfile.api
+++ b/Dockerfile.api
@ -2,7 +2,7 @@
 # v0.4 - API Server

 # Build stage
-FROM golang:1.23-alpine AS builder
+FROM golang:1.25-alpine AS builder

 WORKDIR /app

--- a/Dockerfile.api.prebuild
+++ b/Dockerfile.api.prebuild
@ -1,7 +1,7 @@
-# rdev-api - Pre-built binary runtime
+# rdev-api - Prebuild for cross-platform deployment
 FROM alpine:3.19

-# Install runtime dependencies
+# Install kubectl for exec into pods
 RUN apk add --no-cache ca-certificates curl \
    && curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \
    && chmod +x kubectl \
--- a/IMPLEMENTATION_PLAN_V2.md
+++ b/IMPLEMENTATION_PLAN_V2.md
@ -0,0 +1,974 @@
+# rdev Implementation Plan v2
+
+> Weeks 5-10: From 75% Complete to Pristine Production
+
+## Current State (After Week 4)
+
+### Completed
+| Component | Status | Test Coverage |
+|-----------|--------|---------------|
+| Hexagonal Architecture | ✅ | Domain, Ports, Services |
+| Authentication | ✅ | 394 lines |
+| HTTP API + OpenAPI | ✅ | 1,189 lines |
+| Command Execution | ✅ | 359 lines |
+| Command Sanitization | ✅ | 257 lines |
+| SSE Streaming | ✅ | Last-Event-ID support |
+| Rate Limiting | ✅ | 413 lines |
+| Command Limiting | ✅ | 414 lines |
+| Database + Migrations | ✅ | Auto-migrations |
+| Domain Models | ✅ | 542 lines |
+| Port Interfaces | ✅ | 380 lines |
+| Prometheus Metrics | ✅ | Path normalization |
+| Validation Package | ✅ | 548 lines |
+
+### Remaining Gaps
+| Gap | Impact | Priority |
+|-----|--------|----------|
+| Claude config file I/O | Handlers broken | CRITICAL |
+| Legacy code mixed in | Technical debt | HIGH |
+| Hardcoded projects | Scalability | HIGH |
+| No adapter tests | Reliability | HIGH |
+| IP allowlisting | Security | HIGH |
+| Production manifests | Deployment | MEDIUM |
+| Validation not integrated | Consistency | MEDIUM |
+| Documentation gaps | Usability | MEDIUM |
+
+---
+
+## Philosophy: Foundation First
+
+```
+Week 5-6: Clean the House
+├── Remove all legacy code
+├── Fix broken functionality
+└── Achieve 100% working state
+
+Week 7-8: Strengthen the Foundation
+├── Complete test coverage
+├── Add missing security features
+└── Production-harden deployment
+
+Week 9-10: Polish and Document
+├── Performance optimization
+├── Comprehensive documentation
+└── Final quality gates
+```
+
+---
+
+## Week 5: Legacy Removal & Core Fixes
+
+**Goal**: Remove all legacy code, fix Claude config, integrate validation
+
+### Task 5.1: Remove Legacy Code (4h)
+**Files to delete:**
+- `internal/executor/executor.go` → replaced by `internal/adapter/kubernetes/executor.go`
+- `internal/projects/registry.go` → replaced by `internal/adapter/kubernetes/project_repository.go`
+
+**Files to update:**
+- `internal/handlers/claude_config.go` → Use service layer, not legacy executor
+- `cmd/rdev-api/main.go` → Remove legacy imports
+
+**Acceptance:**
+- `go build ./...` passes
+- No imports from `internal/executor` or `internal/projects`
+- All tests pass
+
+### Task 5.2: Implement Claude Config File I/O (6h)
+**Problem**: Handlers exist but don't actually read/write files
+
+**Create:**
+```
+internal/service/claude_config_service.go
+internal/adapter/kubernetes/claude_config_repository.go
+internal/port/claude_config_repository.go
+```
+
+**Operations to implement:**
+```go
+type ClaudeConfigRepository interface {
+    // List items in .claude/{type}/ directory
+    List(ctx context.Context, podName, itemType string) ([]ConfigItem, error)
+
+    // Get single item content
+    Get(ctx context.Context, podName, itemType, name string) (*ConfigItem, error)
+
+    // Create new item (write file)
+    Create(ctx context.Context, podName, itemType string, item *ConfigItem) error
+
+    // Update existing item
+    Update(ctx context.Context, podName, itemType, name string, content string) error
+
+    // Delete item (remove file)
+    Delete(ctx context.Context, podName, itemType, name string) error
+}
+```
+
+**Implementation via kubectl:**
+```bash
+# List: kubectl exec pod -- ls /workspace/.claude/commands/
+# Get:  kubectl exec pod -- cat /workspace/.claude/commands/deploy.md
+# Create: kubectl exec pod -- sh -c 'cat > /workspace/.claude/commands/new.md'
+# Delete: kubectl exec pod -- rm /workspace/.claude/commands/old.md
+```
+
+**Acceptance:**
+- Can list/create/read/update/delete commands, skills, agents via API
+- E2E test proves round-trip works
+
+### Task 5.3: Integrate Validation Package (3h)
+**Replace inline checks with validate package:**
+
+**Before:**
+```go
+if req.Name == "" {
+    api.WriteBadRequest(w, r, "name is required")
+    return
+}
+```
+
+**After:**
+```go
+v := validate.New()
+v.Required(req.Name, "name")
+v.Name(req.Name, "name") // alphanumeric, 1-64 chars
+if err := v.Error(); err != nil {
+    api.WriteBadRequest(w, r, err.Error())
+    return
+}
+```
+
+**Files to update:**
+- `internal/handlers/keys.go`
+- `internal/handlers/projects.go`
+- `internal/handlers/claude_config.go`
+- `internal/service/project_service.go`
+
+**Acceptance:**
+- All inline validation replaced with validate package
+- Consistent error messages across all endpoints
+- All handler tests pass
+
+### Task 5.4: Consolidate Docker Images (1h)
+**Current state:** 4 Dockerfiles with unclear purpose
+
+**Action:**
+- Keep `Dockerfile` as single canonical image
+- Delete `Dockerfile.api`, `Dockerfile.api.prebuild`, `Dockerfile.api.simple`
+- Update any CI/scripts referencing old files
+
+**Acceptance:**
+- Single `Dockerfile` builds and runs correctly
+- No references to deleted Dockerfiles
+
+---
+
+## Week 6: Dynamic Project Discovery
+
+**Goal**: Remove hardcoded projects, discover from K8s
+
+### Task 6.1: Define Project Labels (1h)
+**K8s label convention:**
+```yaml
+metadata:
+  labels:
+    rdev.orchard9.ai/project: "true"
+    rdev.orchard9.ai/name: "pantheon"
+    rdev.orchard9.ai/workspace: "/workspace"
+  annotations:
+    rdev.orchard9.ai/description: "Go API backend"
+```
+
+**Update existing pods:**
+- claudebox-pantheon-0
+- claudebox-aeries-0
+
+### Task 6.2: Implement Label Discovery (4h)
+**Update `internal/adapter/kubernetes/project_repository.go`:**
+
+```go
+func (r *ProjectRepository) RefreshStatus(ctx context.Context) error {
+    // List pods with label rdev.orchard9.ai/project=true
+    pods, err := r.client.CoreV1().Pods(r.namespace).List(ctx, metav1.ListOptions{
+        LabelSelector: "rdev.orchard9.ai/project=true",
+    })
+
+    // For each pod, extract project info from labels
+    for _, pod := range pods.Items {
+        project := domain.Project{
+            ID:          domain.ProjectID(pod.Labels["rdev.orchard9.ai/name"]),
+            Name:        pod.Labels["rdev.orchard9.ai/name"],
+            Description: pod.Annotations["rdev.orchard9.ai/description"],
+            PodName:     pod.Name,
+            Workspace:   pod.Labels["rdev.orchard9.ai/workspace"],
+            Status:      mapPodPhase(pod.Status.Phase),
+        }
+        r.register(project)
+    }
+}
+```
+
+**Acceptance:**
+- Projects auto-discovered from labeled pods
+- No hardcoded project list
+- New pods automatically appear
+
+### Task 6.3: Add Project ConfigMap Support (3h)
+**For complex project configuration:**
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: rdev-projects
+data:
+  pantheon.yaml: |
+    name: pantheon
+    description: Go API backend
+    pod_selector: claudebox-pantheon-0
+    workspace: /workspace
+    allowed_commands:
+      - claude
+      - shell
+      - git
+    max_concurrent_commands: 5
+```
+
+**Implementation:**
+- Read ConfigMap on startup
+- Merge with label-discovered projects
+- ConfigMap takes precedence for settings
+
+### Task 6.4: Pod Watch for Real-Time Updates (4h)
+**Instead of polling, watch for changes:**
+
+```go
+func (r *ProjectRepository) StartWatching(ctx context.Context) error {
+    watcher, err := r.client.CoreV1().Pods(r.namespace).Watch(ctx, metav1.ListOptions{
+        LabelSelector: "rdev.orchard9.ai/project=true",
+    })
+
+    go func() {
+        for event := range watcher.ResultChan() {
+            switch event.Type {
+            case watch.Added:
+                r.register(podToProject(event.Object))
+            case watch.Deleted:
+                r.unregister(podToProjectID(event.Object))
+            case watch.Modified:
+                r.update(podToProject(event.Object))
+            }
+        }
+    }()
+}
+```
+
+**Acceptance:**
+- Projects appear within 1s of pod creation
+- Projects disappear within 1s of pod deletion
+- No polling required
+
+---
+
+## Week 7: Security & Test Completion
+
+**Goal**: IP allowlisting, comprehensive adapter tests
+
+### Task 7.1: IP Allowlisting (4h)
+**Schema update:**
+```sql
+ALTER TABLE api_keys ADD COLUMN allowed_ips CIDR[];
+```
+
+**Domain update:**
+```go
+type APIKey struct {
+    // ... existing fields
+    AllowedIPs []net.IPNet `json:"allowed_ips,omitempty"`
+}
+```
+
+**Middleware update:**
+```go
+func (m *AuthMiddleware) checkIPAllowed(key *domain.APIKey, clientIP string) bool {
+    if len(key.AllowedIPs) == 0 {
+        return true // No restriction
+    }
+    ip := net.ParseIP(clientIP)
+    for _, allowed := range key.AllowedIPs {
+        if allowed.Contains(ip) {
+            return true
+        }
+    }
+    return false
+}
+```
+
+**Acceptance:**
+- Keys can have IP restrictions
+- Requests from non-allowed IPs get 403
+- Admin can create unrestricted keys
+
+### Task 7.2: Adapter Integration Tests (6h)
+**Create test infrastructure:**
+
+```
+tests/
+├── integration/
+│   ├── postgres_test.go      # Real postgres via docker
+│   ├── kubernetes_test.go    # Mock kubectl
+│   └── testdata/
+│       └── docker-compose.yml
+```
+
+**Postgres adapter tests:**
+- CRUD operations for API keys
+- Scope/project array handling
+- Connection pool behavior
+- Migration idempotency
+
+**Kubernetes adapter tests:**
+- Mock kubectl responses
+- Command execution with output
+- Error handling (pod not found, timeout)
+- Claude config file operations
+
+**Memory adapter tests:**
+- Stream publisher pub/sub
+- Event replay buffer
+- Concurrent subscriber handling
+
+**Acceptance:**
+- All adapters have >80% coverage
+- Tests run in CI without real K8s
+- Docker-compose for postgres tests
+
+### Task 7.3: Service Layer Tests (4h)
+**Create:**
+```
+internal/service/project_service_test.go
+internal/service/apikey_service_test.go
+internal/service/claude_config_service_test.go
+```
+
+**Test patterns:**
+- Happy path for all operations
+- Error propagation from adapters
+- Business rule enforcement
+- Metrics recording
+
+### Task 7.4: Improve E2E Test Coverage (4h)
+**Expand `tests/e2e/e2e_test.go`:**
+
+```go
+func TestE2E_FullCommandLifecycle(t *testing.T) {
+    // 1. Create API key
+    // 2. Execute claude command
+    // 3. Stream output via SSE
+    // 4. Verify completion event
+    // 5. Check metrics incremented
+}
+
+func TestE2E_RateLimiting(t *testing.T) {
+    // Send 101 requests rapidly
+    // Verify 429 on 101st request
+    // Wait for bucket refill
+    // Verify request succeeds
+}
+
+func TestE2E_SSEReconnection(t *testing.T) {
+    // Start command
+    // Connect to stream
+    // Disconnect
+    // Reconnect with Last-Event-ID
+    // Verify replay
+}
+
+func TestE2E_ConcurrentCommands(t *testing.T) {
+    // Start 5 commands
+    // Verify 6th blocked
+    // Complete one
+    // Verify 6th now succeeds
+}
+```
+
+---
+
+## Week 8: Production Hardening
+
+**Goal**: Production-ready K8s manifests, reliability features
+
+### Task 8.1: K8s Manifest Hardening (4h)
+**Update `deployments/k8s/base/`:**
+
+```yaml
+# deployment.yaml
+spec:
+  template:
+    spec:
+      containers:
+        - name: rdev-api
+          resources:
+            requests:
+              memory: "128Mi"
+              cpu: "100m"
+            limits:
+              memory: "512Mi"
+              cpu: "500m"
+          livenessProbe:
+            httpGet:
+              path: /health
+              port: 8080
+            initialDelaySeconds: 5
+            periodSeconds: 10
+          readinessProbe:
+            httpGet:
+              path: /ready
+              port: 8080
+            initialDelaySeconds: 5
+            periodSeconds: 5
+          securityContext:
+            runAsNonRoot: true
+            readOnlyRootFilesystem: true
+            capabilities:
+              drop: ["ALL"]
+```
+
+```yaml
+# pdb.yaml
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: rdev-api-pdb
+spec:
+  minAvailable: 1
+  selector:
+    matchLabels:
+      app: rdev-api
+```
+
+```yaml
+# network-policy.yaml
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: rdev-api-policy
+spec:
+  podSelector:
+    matchLabels:
+      app: rdev-api
+  policyTypes:
+    - Ingress
+    - Egress
+  ingress:
+    - from:
+        - namespaceSelector:
+            matchLabels:
+              name: ingress
+      ports:
+        - port: 8080
+  egress:
+    - to:
+        - namespaceSelector:
+            matchLabels:
+              name: databases
+      ports:
+        - port: 5432
+    - to:
+        - podSelector:
+            matchLabels:
+              rdev.orchard9.ai/project: "true"
+```
+
+### Task 8.2: RBAC Configuration (2h)
+```yaml
+# rbac.yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: rdev-api
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: rdev-api-role
+rules:
+  - apiGroups: [""]
+    resources: ["pods"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: [""]
+    resources: ["pods/exec"]
+    verbs: ["create"]
+  - apiGroups: [""]
+    resources: ["configmaps"]
+    verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: rdev-api-binding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: rdev-api-role
+subjects:
+  - kind: ServiceAccount
+    name: rdev-api
+```
+
+### Task 8.3: Graceful Shutdown (3h)
+```go
+// cmd/rdev-api/main.go
+func main() {
+    // ... setup ...
+
+    srv := &http.Server{
+        Addr:    cfg.Addr,
+        Handler: router,
+    }
+
+    // Start server
+    go func() {
+        if err := srv.ListenAndServe(); err != http.ErrServerClosed {
+            log.Fatal(err)
+        }
+    }()
+
+    // Wait for interrupt
+    quit := make(chan os.Signal, 1)
+    signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
+    <-quit
+
+    // Graceful shutdown
+    ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+    defer cancel()
+
+    // Stop accepting new requests
+    srv.SetKeepAlivesEnabled(false)
+
+    // Wait for active requests
+    if err := srv.Shutdown(ctx); err != nil {
+        log.Error("forced shutdown", "error", err)
+    }
+
+    // Close database connections
+    db.Close()
+
+    log.Info("server stopped gracefully")
+}
+```
+
+### Task 8.4: Circuit Breaker for K8s (3h)
+**Protect against K8s API failures:**
+
+```go
+type CircuitBreaker struct {
+    failures    int
+    threshold   int
+    resetAfter  time.Duration
+    lastFailure time.Time
+    state       State // Closed, Open, HalfOpen
+    mu          sync.RWMutex
+}
+
+func (cb *CircuitBreaker) Execute(fn func() error) error {
+    cb.mu.RLock()
+    if cb.state == Open && time.Since(cb.lastFailure) < cb.resetAfter {
+        cb.mu.RUnlock()
+        return ErrCircuitOpen
+    }
+    cb.mu.RUnlock()
+
+    err := fn()
+
+    cb.mu.Lock()
+    defer cb.mu.Unlock()
+    if err != nil {
+        cb.failures++
+        cb.lastFailure = time.Now()
+        if cb.failures >= cb.threshold {
+            cb.state = Open
+        }
+    } else {
+        cb.failures = 0
+        cb.state = Closed
+    }
+    return err
+}
+```
+
+### Task 8.5: Health Check Enhancements (2h)
+```go
+// /health - Basic liveness
+func (h *HealthHandler) Health(w http.ResponseWriter, r *http.Request) {
+    api.WriteSuccess(w, r, map[string]string{"status": "ok"})
+}
+
+// /ready - Full readiness
+func (h *HealthHandler) Ready(w http.ResponseWriter, r *http.Request) {
+    checks := make(map[string]string)
+
+    // Database connectivity
+    if err := h.db.PingContext(r.Context()); err != nil {
+        checks["database"] = "unhealthy: " + err.Error()
+    } else {
+        checks["database"] = "healthy"
+    }
+
+    // K8s connectivity
+    if err := h.k8sClient.Ping(r.Context()); err != nil {
+        checks["kubernetes"] = "unhealthy: " + err.Error()
+    } else {
+        checks["kubernetes"] = "healthy"
+    }
+
+    // Check for any unhealthy
+    for _, status := range checks {
+        if strings.HasPrefix(status, "unhealthy") {
+            api.WriteError(w, r, http.StatusServiceUnavailable,
+                "NOT_READY", "service not ready", checks)
+            return
+        }
+    }
+
+    api.WriteSuccess(w, r, map[string]any{
+        "status": "ready",
+        "checks": checks,
+    })
+}
+```
+
+---
+
+## Week 9: Performance & Observability
+
+**Goal**: OpenTelemetry, performance optimization
+
+### Task 9.1: OpenTelemetry Integration (6h)
+**Add tracing:**
+
+```go
+// cmd/rdev-api/main.go
+func initTracing() (*sdktrace.TracerProvider, error) {
+    exporter, err := otlptracehttp.New(context.Background(),
+        otlptracehttp.WithEndpoint(os.Getenv("OTEL_EXPORTER_ENDPOINT")),
+    )
+    if err != nil {
+        return nil, err
+    }
+
+    tp := sdktrace.NewTracerProvider(
+        sdktrace.WithBatcher(exporter),
+        sdktrace.WithResource(resource.NewWithAttributes(
+            semconv.SchemaURL,
+            semconv.ServiceName("rdev-api"),
+            semconv.ServiceVersion(Version),
+        )),
+    )
+    otel.SetTracerProvider(tp)
+    return tp, nil
+}
+```
+
+**Instrument handlers:**
+```go
+func (h *ProjectsHandler) RunClaude(w http.ResponseWriter, r *http.Request) {
+    ctx, span := tracer.Start(r.Context(), "RunClaude")
+    defer span.End()
+
+    span.SetAttributes(
+        attribute.String("project.id", projectID),
+        attribute.String("command.type", "claude"),
+    )
+
+    // ... handler logic ...
+
+    if err != nil {
+        span.RecordError(err)
+        span.SetStatus(codes.Error, err.Error())
+    }
+}
+```
+
+### Task 9.2: Connection Pool Tuning (2h)
+**Database:**
+```go
+db.SetMaxOpenConns(25)
+db.SetMaxIdleConns(10)
+db.SetConnMaxLifetime(5 * time.Minute)
+db.SetConnMaxIdleTime(1 * time.Minute)
+```
+
+**HTTP client for K8s:**
+```go
+transport := &http.Transport{
+    MaxIdleConns:        100,
+    MaxIdleConnsPerHost: 10,
+    IdleConnTimeout:     90 * time.Second,
+}
+```
+
+### Task 9.3: Response Caching (3h)
+**Cache project list (changes infrequently):**
+
+```go
+type CachedProjectRepository struct {
+    inner     port.ProjectRepository
+    cache     *sync.Map
+    ttl       time.Duration
+    lastFetch time.Time
+    mu        sync.RWMutex
+}
+
+func (r *CachedProjectRepository) List(ctx context.Context) ([]domain.Project, error) {
+    r.mu.RLock()
+    if time.Since(r.lastFetch) < r.ttl {
+        if cached, ok := r.cache.Load("projects"); ok {
+            r.mu.RUnlock()
+            return cached.([]domain.Project), nil
+        }
+    }
+    r.mu.RUnlock()
+
+    r.mu.Lock()
+    defer r.mu.Unlock()
+
+    // Double-check after acquiring write lock
+    if time.Since(r.lastFetch) < r.ttl {
+        if cached, ok := r.cache.Load("projects"); ok {
+            return cached.([]domain.Project), nil
+        }
+    }
+
+    projects, err := r.inner.List(ctx)
+    if err != nil {
+        return nil, err
+    }
+
+    r.cache.Store("projects", projects)
+    r.lastFetch = time.Now()
+    return projects, nil
+}
+```
+
+### Task 9.4: Benchmark Suite (3h)
+```go
+// internal/handlers/projects_bench_test.go
+
+func BenchmarkRunClaude(b *testing.B) {
+    // Setup
+    handler := setupTestHandler()
+
+    b.ResetTimer()
+    for i := 0; i < b.N; i++ {
+        req := httptest.NewRequest("POST", "/projects/test/claude",
+            strings.NewReader(`{"prompt":"test"}`))
+        rec := httptest.NewRecorder()
+        handler.RunClaude(rec, req)
+    }
+}
+
+func BenchmarkSSEStreaming(b *testing.B) {
+    // Measure event throughput
+}
+
+func BenchmarkAuthMiddleware(b *testing.B) {
+    // Measure auth overhead
+}
+```
+
+---
+
+## Week 10: Documentation & Polish
+
+**Goal**: Comprehensive docs, final quality pass
+
+### Task 10.1: Architecture Documentation (4h)
+**Create `docs/architecture/`:**
+
+```
+docs/architecture/
+├── README.md           # Overview + diagrams
+├── hexagonal.md        # Port/adapter pattern
+├── security.md         # Auth, sanitization, rate limiting
+├── streaming.md        # SSE protocol, reconnection
+└── diagrams/
+    ├── system-context.mmd
+    ├── component.mmd
+    └── sequence-command.mmd
+```
+
+**Include:**
+- System context diagram
+- Component diagram
+- Sequence diagrams for key flows
+- ADRs (Architecture Decision Records)
+
+### Task 10.2: API Documentation (3h)
+**Enhance OpenAPI spec:**
+- Add examples for all endpoints
+- Document error codes
+- Add authentication examples
+- Include rate limit headers
+
+**Create `docs/api/`:**
+- Quick start guide
+- Authentication guide
+- SSE client examples (JS, Python, Go)
+- Error handling guide
+
+### Task 10.3: Operations Documentation (3h)
+**Create `docs/operations/`:**
+
+```
+docs/operations/
+├── deployment.md       # K8s deployment guide
+├── monitoring.md       # Prometheus/Grafana setup
+├── troubleshooting.md  # Common issues
+├── runbooks/
+│   ├── high-cpu.md
+│   ├── high-memory.md
+│   ├── pod-not-found.md
+│   └── auth-failures.md
+└── disaster-recovery.md
+```
+
+### Task 10.4: Final Quality Gate (4h)
+**Run comprehensive checks:**
+
+```bash
+# Static analysis
+golangci-lint run ./...
+
+# Security scan
+gosec ./...
+
+# Test coverage
+go test -coverprofile=coverage.out ./...
+go tool cover -html=coverage.out -o coverage.html
+
+# Benchmark baseline
+go test -bench=. -benchmem ./... > benchmark.txt
+
+# Dependency audit
+go list -m all | nancy sleuth
+
+# Build all targets
+go build ./...
+GOOS=linux GOARCH=amd64 go build ./...
+
+# Docker build
+docker build -t rdev-api:latest .
+```
+
+**Coverage targets:**
+| Package | Target |
+|---------|--------|
+| internal/auth | >90% |
+| internal/handlers | >85% |
+| internal/service | >90% |
+| internal/adapter/* | >80% |
+| internal/domain | >95% |
+
+### Task 10.5: Release Preparation (2h)
+**Create release checklist:**
+
+```markdown
+## v1.0.0 Release Checklist
+
+### Pre-release
+- [ ] All tests pass
+- [ ] Coverage targets met
+- [ ] Security scan clean
+- [ ] Benchmarks acceptable
+- [ ] Documentation complete
+- [ ] CHANGELOG.md updated
+- [ ] Version bumped
+
+### Release
+- [ ] Tag created
+- [ ] Docker image built and pushed
+- [ ] K8s manifests updated
+- [ ] Release notes published
+
+### Post-release
+- [ ] Smoke test in staging
+- [ ] Monitor error rates
+- [ ] Monitor latency
+- [ ] Announce to users
+```
+
+---
+
+## Summary: Week-by-Week
+
+| Week | Focus | Key Deliverables |
+|------|-------|------------------|
+| **5** | Legacy Removal & Core Fixes | Clean codebase, working Claude config, integrated validation |
+| **6** | Dynamic Project Discovery | Label-based discovery, ConfigMap support, pod watching |
+| **7** | Security & Tests | IP allowlisting, adapter tests, service tests, E2E |
+| **8** | Production Hardening | K8s manifests, RBAC, graceful shutdown, circuit breaker |
+| **9** | Performance & Observability | OpenTelemetry, connection tuning, caching, benchmarks |
+| **10** | Documentation & Polish | Architecture docs, API docs, ops docs, final QA |
+
+---
+
+## Success Criteria: Pristine Project
+
+### Code Quality
+- [ ] No legacy code remaining
+- [ ] 100% of handlers use service layer
+- [ ] All validation via validate package
+- [ ] Consistent error handling throughout
+- [ ] No TODO/FIXME without ticket
+
+### Test Coverage
+- [ ] >85% overall coverage
+- [ ] All adapters have integration tests
+- [ ] E2E tests cover all user journeys
+- [ ] Benchmark suite for performance regression
+
+### Security
+- [ ] Command sanitization (shell injection)
+- [ ] IP allowlisting support
+- [ ] Rate limiting enforced
+- [ ] Secrets never logged
+- [ ] RBAC configured
+
+### Production Ready
+- [ ] Resource limits set
+- [ ] Health/readiness probes
+- [ ] Graceful shutdown
+- [ ] Network policies
+- [ ] PodDisruptionBudget
+- [ ] Monitoring dashboards
+
+### Documentation
+- [ ] Architecture documented
+- [ ] API fully documented with examples
+- [ ] Operations runbooks
+- [ ] Troubleshooting guide
+- [ ] Deployment guide
+
+### Observability
+- [ ] Prometheus metrics
+- [ ] OpenTelemetry tracing
+- [ ] Structured logging
+- [ ] Error tracking
+
+---
+
+## Estimated Effort
+
+| Week | Hours |
+|------|-------|
+| 5 | 14h |
+| 6 | 12h |
+| 7 | 18h |
+| 8 | 14h |
+| 9 | 14h |
+| 10 | 16h |
+| **Total** | **88h** |
+
+At ~15h/week pace: **6 weeks** to pristine.
+At ~30h/week pace: **3 weeks** to pristine.
--- a/cmd/rdev-api/main.go
+++ b/cmd/rdev-api/main.go
@ -38,10 +38,20 @@ import (
 	"log/slog"
 	"os"
 	"strconv"
+	"time"

+	"github.com/orchard9/rdev/internal/adapter/kubernetes"
+	"github.com/orchard9/rdev/internal/adapter/memory"
+	"github.com/orchard9/rdev/internal/adapter/postgres"
 	"github.com/orchard9/rdev/internal/auth"
 	"github.com/orchard9/rdev/internal/db"
 	"github.com/orchard9/rdev/internal/handlers"
+	"github.com/orchard9/rdev/internal/metrics"
+	"github.com/orchard9/rdev/internal/middleware"
+	"github.com/orchard9/rdev/internal/service"
+	"github.com/orchard9/rdev/internal/telemetry"
+	"github.com/orchard9/rdev/internal/webhook"
+	"github.com/orchard9/rdev/internal/worker"
 	"github.com/orchard9/rdev/pkg/api"
 )

@ -50,6 +60,15 @@ func main() {
 		Level: slog.LevelInfo,
 	}))

+	// Initialize telemetry (OpenTelemetry)
+	telCfg := telemetry.DefaultConfig()
+	telCfg.Logger = logger
+	tel, err := telemetry.New(context.Background(), telCfg)
+	if err != nil {
+		logger.Error("failed to initialize telemetry", "error", err)
+		os.Exit(1)
+	}
+
 	// Load configuration from environment
 	cfg := loadConfig()

@ -66,38 +85,144 @@ func main() {
 		logger.Error("failed to connect to database", "error", err)
 		os.Exit(1)
 	}
-	defer database.Close()
+	defer func() { _ = database.Close() }()

 	// Initialize auth service
 	authService := auth.NewService(database.DB, cfg.AdminKey)

+	// Create adapters (dependency injection)
+	namespace := getEnv("K8S_NAMESPACE", "rdev")
+
+	// Initialize K8s client for dynamic project discovery
+	// Falls back gracefully if K8s is unavailable (e.g., local development)
+	k8sClient := kubernetes.NewClientOrNil(kubernetes.ClientConfig{
+		Namespace:  namespace,
+		Kubeconfig: os.Getenv("KUBECONFIG"),
+	})
+	if k8sClient != nil {
+		logger.Info("k8s client initialized, dynamic project discovery enabled")
+	} else {
+		logger.Warn("k8s client unavailable, using hardcoded fallback projects")
+	}
+
+	projectRepo := kubernetes.NewProjectRepositoryWithClient(namespace, k8sClient, logger)
+	k8sExecutor := kubernetes.NewExecutor(namespace)
+	streamPub := memory.NewStreamPublisher()
+
+	// Start watching for project pod changes if K8s client is available
+	if k8sClient != nil {
+		if err := projectRepo.StartWatching(context.Background()); err != nil {
+			logger.Warn("failed to start project watcher", "error", err)
+		}
+	}
+
+	// Initialize audit logger
+	auditLogger := postgres.NewAuditLogger(database.DB)
+
+	// Initialize rate limiter
+	rateLimiter := postgres.NewRateLimiter(database.DB)
+	stopRateLimitCleanup := rateLimiter.StartCleanupWorker(context.Background(), 5*time.Minute)
+
+	// Initialize command queue
+	commandQueue := postgres.NewCommandQueueRepository(database.DB)
+
+	// Initialize webhook repository and dispatcher
+	webhookRepo := postgres.NewWebhookRepository(database.DB)
+	webhookDispatcher := webhook.NewDispatcher(webhookRepo, &webhook.DispatcherConfig{
+		WorkerCount:  10,
+		MaxRetries:   3,
+		Timeout:      30 * time.Second,
+		RetryBackoff: 5 * time.Second,
+		Logger:       logger,
+	})
+	if err := webhookDispatcher.Start(); err != nil {
+		logger.Error("failed to start webhook dispatcher", "error", err)
+		os.Exit(1)
+	}
+
+	// Create services
+	projectService := service.NewProjectService(projectRepo, k8sExecutor, streamPub).
+		WithAuditLogger(auditLogger).
+		WithCommandQueue(commandQueue).
+		WithWebhookDispatcher(webhookDispatcher)
+
 	// Create app
 	app := api.New("rdev-api",
 		api.WithPort(cfg.Port),
 		api.WithLogger(logger),
 	)

-	// Add auth middleware (skips /health, /ready, /docs, /openapi.json)
+	// Add telemetry middleware (first to capture all requests)
+	app.Use(telemetry.Middleware(telCfg.ServiceName))
+
+	// Add metrics middleware (before auth to track all requests)
+	app.Use(metrics.Middleware)
+
+	// Add auth middleware (skips /health, /ready, /docs, /openapi.json, /metrics)
 	app.Use(auth.Middleware(authService))

+	// Add rate limiting middleware (after auth, so we have API key context)
+	rateLimitCfg := middleware.DefaultRateLimitConfig()
+	rateLimitCfg.Limiter = rateLimiter
+	app.Use(middleware.RateLimitMiddleware(rateLimitCfg))
+
+	// Register metrics endpoint (no auth required)
+	app.Router().Handle("/metrics", metrics.Handler())
+
 	// Initialize handlers
-	projectsHandler := handlers.NewProjectsHandler()
+	projectsHandler := handlers.NewProjectsHandlerWithService(projectService)
 	keysHandler := handlers.NewKeysHandler(authService)
-	claudeConfigHandler := handlers.NewClaudeConfigHandler(
-		projectsHandler.Registry(),
-		projectsHandler.Executor(),
-	)
+	claudeConfigHandler := handlers.NewClaudeConfigHandlerWithService(projectService, projectRepo, k8sExecutor)
+	auditHandler := handlers.NewAuditHandler(auditLogger)
+	queueHandler := handlers.NewQueueHandler(commandQueue, projectRepo)
+	webhookHandler := handlers.NewWebhookHandler(webhookRepo, projectRepo)

 	// Register routes
 	projectsHandler.Mount(app.Router())
 	keysHandler.Mount(app.Router())
 	claudeConfigHandler.Mount(app.Router())
+	auditHandler.Mount(app.Router())
+	queueHandler.Mount(app.Router())
+	webhookHandler.Mount(app.Router())
+
+	// Start queue processor worker
+	queueProcessor := worker.NewQueueProcessor(
+		commandQueue,
+		k8sExecutor,
+		projectRepo,
+		streamPub,
+		&worker.QueueProcessorConfig{
+			PollPeriod: 5 * time.Second,
+			Logger:     logger,
+		},
+	).WithWebhookDispatcher(webhookDispatcher)
+	if err := queueProcessor.Start(); err != nil {
+		logger.Error("failed to start queue processor", "error", err)
+		os.Exit(1)
+	}

 	// Enable API documentation
 	app.EnableDocs(buildOpenAPISpec())

 	// Cleanup on shutdown
-	app.OnShutdown(func(_ context.Context) error {
+	app.OnShutdown(func(ctx context.Context) error {
+		// Stop queue processor
+		queueProcessor.Stop()
+
+		// Stop webhook dispatcher
+		webhookDispatcher.Stop()
+
+		// Stop project watcher
+		projectRepo.StopWatching()
+
+		// Stop rate limit cleanup worker
+		stopRateLimitCleanup()
+
+		// Shutdown telemetry (flush pending traces)
+		if err := tel.Shutdown(ctx); err != nil {
+			logger.Error("telemetry shutdown error", "error", err)
+		}
+
 		return database.Close()
 	})

@ -168,9 +293,9 @@ External clients (Discord bots, Slack bots, CLI tools) connect here to interact

 All endpoints except /health, /ready, and /docs require authentication via API key.

-**Header**: ` + "`X-API-Key: rdev_sk_xxxxxxxx_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`" + `
+**Header**: `+"`X-API-Key: rdev_sk_xxxxxxxx_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`"+`

-Or: ` + "`Authorization: Bearer rdev_sk_...`" + `
+Or: `+"`Authorization: Bearer rdev_sk_...`"+`

 ### Getting Started

@ -186,6 +311,7 @@ Or: ` + "`Authorization: Bearer rdev_sk_...`" + `
 | projects:execute | Run commands (claude, shell, git) |
 | keys:read | List API keys (metadata only) |
 | keys:write | Create and revoke keys |
+| audit:read | View audit logs for command executions |
 | admin | Full access (all scopes) |

 ## Architecture
@ -207,6 +333,7 @@ Command output is streamed via Server-Sent Events (SSE) at /projects/{id}/events
 	spec.WithTag("Commands", "Command execution (claude, shell, git)")
 	spec.WithTag("Events", "Server-Sent Events for real-time output")
 	spec.WithTag("Claude Config", "Manage commands, skills, and agents in /workspace/.claude/")
+	spec.WithTag("Audit", "Command execution audit logs")
 	spec.WithTag("System", "Health and readiness endpoints")

 	// System endpoints
@ -622,6 +749,84 @@ Commands are markdown files with frontmatter. Requires projects:execute scope.`,
 		},
 	))

+	// Audit log endpoints
+	spec.AddPath("/audit-log", "get", map[string]any{
+		"summary": "List audit log entries",
+		"description": `Returns audit log entries with optional filtering.
+
+**Required scope**: ` + "`audit:read`" + `
+
+## Query Parameters
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| project | string | Filter by project ID |
+| api_key | string | Filter by API key ID |
+| command_type | string | Filter by type (claude, shell, git) |
+| status | string | Filter by status (running, success, error, cancelled) |
+| start | string | Filter by start time (RFC3339 format) |
+| end | string | Filter by end time (RFC3339 format) |
+| limit | int | Max entries to return (default: 100, max: 1000) |
+| offset | int | Number of entries to skip for pagination |`,
+		"tags": []string{"Audit"},
+		"security": []map[string]any{
+			{"ApiKeyAuth": []string{}},
+		},
+		"parameters": []map[string]any{
+			{"name": "project", "in": "query", "description": "Filter by project ID", "schema": map[string]any{"type": "string"}},
+			{"name": "api_key", "in": "query", "description": "Filter by API key ID", "schema": map[string]any{"type": "string"}},
+			{"name": "command_type", "in": "query", "description": "Filter by command type", "schema": map[string]any{"type": "string", "enum": []string{"claude", "shell", "git"}}},
+			{"name": "status", "in": "query", "description": "Filter by status", "schema": map[string]any{"type": "string", "enum": []string{"running", "success", "error", "cancelled"}}},
+			{"name": "start", "in": "query", "description": "Filter by start time (RFC3339)", "schema": map[string]any{"type": "string", "format": "date-time"}},
+			{"name": "end", "in": "query", "description": "Filter by end time (RFC3339)", "schema": map[string]any{"type": "string", "format": "date-time"}},
+			{"name": "limit", "in": "query", "description": "Max entries (default: 100)", "schema": map[string]any{"type": "integer", "default": 100}},
+			{"name": "offset", "in": "query", "description": "Entries to skip", "schema": map[string]any{"type": "integer", "default": 0}},
+		},
+		"responses": map[string]any{
+			"200": map[string]any{
+				"description": "Success",
+				"content": map[string]any{
+					"application/json": map[string]any{
+						"example": `{
+  "entries": [
+    {
+      "id": "550e8400-e29b-41d4-a716-446655440000",
+      "api_key_id": "key-123",
+      "command_id": "cmd-pantheon-001",
+      "project_id": "pantheon",
+      "command_type": "claude",
+      "args": "[\"fix the bug\"]",
+      "client_ip": "192.168.1.100",
+      "user_agent": "rdev-cli/1.0",
+      "started_at": "2026-01-25T12:00:00Z",
+      "completed_at": "2026-01-25T12:01:30Z",
+      "exit_code": 0,
+      "duration_ms": 90000,
+      "status": "success",
+      "output_size_bytes": 1024,
+      "created_at": "2026-01-25T12:00:00Z"
+    }
+  ],
+  "total": 1,
+  "limit": 100,
+  "offset": 0
+}`,
+					},
+				},
+			},
+			"401": map[string]any{"description": "Unauthorized - Missing or invalid API key"},
+			"403": map[string]any{"description": "Forbidden - Insufficient permissions"},
+		},
+	})
+
+	spec.AddPath("/audit-log/{command_id}", "get", withAuthAndParams(
+		"Get audit log entry",
+		"Returns a single audit log entry by command ID. Requires audit:read scope.",
+		"Audit",
+		"audit:read",
+		[]param{{Name: "command_id", In: "path", Description: "Command ID", Required: true}},
+	))
+
 	return spec
 }

--- a/deployments/k8s/base/kustomization.yaml
+++ b/deployments/k8s/base/kustomization.yaml
@ -27,6 +27,10 @@ resources:
  # v0.4+ - API Server (RBAC now included in rdev-api.yaml)
  - rdev-api.yaml

+  # v0.8+ - Production hardening
+  - pdb.yaml
+  - network-policy.yaml
+
 commonLabels:
  app.kubernetes.io/managed-by: kustomize
  app.kubernetes.io/part-of: rdev
--- a/deployments/k8s/base/network-policy.yaml
+++ b/deployments/k8s/base/network-policy.yaml
@ -0,0 +1,59 @@
+# Network Policy for rdev-api
+# Restricts network access to only required endpoints
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: rdev-api-policy
+  namespace: rdev
+  labels:
+    app.kubernetes.io/name: rdev-api
+    app.kubernetes.io/part-of: rdev
+spec:
+  podSelector:
+    matchLabels:
+      app: rdev-api
+  policyTypes:
+    - Ingress
+    - Egress
+  ingress:
+    # Allow ingress from ingress controller
+    - from:
+        - namespaceSelector:
+            matchLabels:
+              kubernetes.io/metadata.name: ingress-nginx
+      ports:
+        - protocol: TCP
+          port: 8080
+    # Allow ingress from within the rdev namespace (for service mesh, probes)
+    - from:
+        - namespaceSelector:
+            matchLabels:
+              kubernetes.io/metadata.name: rdev
+      ports:
+        - protocol: TCP
+          port: 8080
+  egress:
+    # Allow egress to PostgreSQL in databases namespace
+    - to:
+        - namespaceSelector:
+            matchLabels:
+              kubernetes.io/metadata.name: databases
+      ports:
+        - protocol: TCP
+          port: 5432
+    # Allow egress to claudebox pods within the rdev namespace
+    - to:
+        - podSelector:
+            matchLabels:
+              rdev.orchard9.ai/project: "true"
+    # Allow DNS resolution
+    - to:
+        - namespaceSelector: {}
+          podSelector:
+            matchLabels:
+              k8s-app: kube-dns
+      ports:
+        - protocol: UDP
+          port: 53
+        - protocol: TCP
+          port: 53
--- a/deployments/k8s/base/pdb.yaml
+++ b/deployments/k8s/base/pdb.yaml
@ -0,0 +1,15 @@
+# Pod Disruption Budget for rdev-api
+# Ensures at least 1 pod is always available during voluntary disruptions
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: rdev-api-pdb
+  namespace: rdev
+  labels:
+    app.kubernetes.io/name: rdev-api
+    app.kubernetes.io/part-of: rdev
+spec:
+  minAvailable: 1
+  selector:
+    matchLabels:
+      app: rdev-api
--- a/deployments/k8s/base/rdev-api.yaml
+++ b/deployments/k8s/base/rdev-api.yaml
@ -24,7 +24,7 @@ spec:
      serviceAccountName: rdev-api
      containers:
        - name: rdev-api
-          image: ghcr.io/orchard9/rdev-api:v0.5.0
+          image: ghcr.io/orchard9/rdev-api:v0.6.0
          imagePullPolicy: Always

          ports:
@ -37,7 +37,15 @@ spec:
              memory: "128Mi"
            limits:
              cpu: "500m"
-              memory: "256Mi"
+              memory: "512Mi"
+
+          securityContext:
+            runAsNonRoot: true
+            runAsUser: 1000
+            readOnlyRootFilesystem: true
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop: ["ALL"]

          livenessProbe:
            httpGet:
@ -109,19 +117,25 @@ metadata:
  name: rdev-api
  namespace: rdev
 ---
-# Role for rdev-api to exec into claudebox pods
+# Role for rdev-api to exec into claudebox pods and read configmaps
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
 metadata:
  name: rdev-api
  namespace: rdev
 rules:
+# Pod access for discovery and status
 - apiGroups: [""]
  resources: ["pods"]
  verbs: ["get", "list", "watch"]
+# Pod exec for command execution
 - apiGroups: [""]
  resources: ["pods/exec"]
  verbs: ["create"]
+# ConfigMap access for project configuration
+- apiGroups: [""]
+  resources: ["configmaps"]
+  verbs: ["get", "list", "watch"]
 ---
 # RoleBinding for rdev-api
 apiVersion: rbac.authorization.k8s.io/v1
--- a/deployments/k8s/base/secrets.yaml
+++ b/deployments/k8s/base/secrets.yaml
@ -1,46 +0,0 @@
-# GitHub Deploy Key Secrets for rdev
-# v0.2 - SSH keys for repo cloning
-#
-# INSTRUCTIONS:
-# 1. Generate deploy keys: ./scripts/generate-deploy-key.sh pantheon
-# 2. Add PUBLIC key to GitHub repo Settings -> Deploy Keys
-# 3. Replace placeholder values below with base64-encoded PRIVATE key
-# 4. Apply: kubectl apply -f secrets.yaml
-#
-# To encode: cat pantheon-deploy-key | base64 -w0
-# To decode and verify: echo "<base64>" | base64 -d
-
-apiVersion: v1
-kind: Secret
-metadata:
-  name: github-deploy-key-pantheon
-  namespace: rdev
-  labels:
-    app.kubernetes.io/name: claudebox-pantheon
-    app.kubernetes.io/part-of: rdev
-    rdev.orchard9.ai/project: pantheon
-type: Opaque
-data:
-  # Replace with base64-encoded private key
-  # Generate with: ssh-keygen -t ed25519 -f pantheon-deploy-key -N ""
-  # Encode with: cat pantheon-deploy-key | base64 -w0
-  id_ed25519: LS0tLS1CRUdJTiBPUEVOU1NIIFBSSVZBVEUgS0VZLS0tLS0KYjNCbGJuTnphQzFyWlhrdGRqRUFBQUFBQkc1dmJtVUFBQUFFYm05dVpRQUFBQUFBQUFBQkFBQUFNd0FBQUF0emMyZ3RaVwpReU5UVXhPUUFBQUNDU29NQkZpRWg5akZQNnpUWWlJaUpkMUdzRjRxM29oN2lBZ1JRUkNYYTdKQUFBQUtDMkNXck90Z2xxCnpnQUFBQXR6YzJndFpXUXlOVFV4T1FBQUFDQ1NvTUJGaUVoOWpGUDZ6VFlpSWlKZDFHc0Y0cTNvaDdpQWdSUVJDWGE3SkEKQUFBRUNyc08zSDNoQ2tQQ0I1V0VRTFdDZ0QyOGlrNGN3dk5oalVjVGwzVGNqVkRKS2d3RVdJU0gyTVUvck5OaUlpSWwzVQphd1hpcmVpSHVJQ0JGQkVKZHJza0FBQUFHWEprWlhZdGNHRnVkR2hsYjI1QWIzSmphR0Z5WkRrdVlXa0JBZ01FCi0tLS0tRU5EIE9QRU5TU0ggUFJJVkFURSBLRVktLS0tLQo=
-
-  # GitHub's SSH host key (pre-populated)
-  known_hosts: Z2l0aHViLmNvbSBzc2gtZWQyNTUxOSBBQUFBQzNOemFDMWxaREkxTlRFNUFBQUFJT01xcW5rVnpybTBTZEc2VU9vcUtMc2FiZ0g1Qzlva1dpMGRoMmw5R0tKbApnaXRodWIuY29tIGVjZHNhLXNoYTItbmlzdHAyNTYgQUFBQUUyVmpaSE5oTFhOb1lUSXRibWx6ZEhBeU5UWUFBQUFJYm1semRIQXlOVFlBQUFCQkJFbUtTRU5qUUVlek9teGtaTXk3b3BLZ3dGQjlua3Q1WVJyWU1qTnVHNU44N3VSUW81dDRRYkZGelVaYUpVQjd4TmtjYVFTNmlIbW5TazdNOU9tZUR2PT0KZ2l0aHViLmNvbSBzc2gtcnNhIEFBQUFCM056YUMxeWMyRUFBQUFEQVFBQkFBQUJnUUNqN25kTnhRb3dnY1FuanNoY0xycVBFaWlwaG50K1ZUVHZEUCtsSFhaZFhMRThWVUxDS0lLYjloZk5qM0FXSm1RTHBDb0Qzc1F2TWtGNUxXR1RMSFRVM25MSjViZi8wbG5wOGV5ZXhVNkpzR1dSUUFLTnlENjkzQjVVR2xXVlM1VjFqUEg1M3BZVllWUVB6WnlkeGpPUVFLeHk5ZkdoaVFGbGcza3RoZFdSRE5oNy9SRHp4SEZEZmRYYm5uSnZ4WVQ0Y1FVWWJ0SmFTQ0pWcU9aOVlUbG13bTJBUXZaM3IxZEJkZzVRcWN1SW53bzR1NXBhQUpObnpiTXBudGtzVXpWNEorUFN5OE9LSzRPc0tUc0I0RlNjS0VOSmRlMTlYTGFCUHJiNTZpUHhCS0tSMGJNK2NPdnhKelhhZWJORktjR2k4eVJLaGw0T0hlYkhCWDh4eFpZNWMwdWdpcTlSb29QaUtPelJERE1lekdhK0c4MDg1OVF2TkdPK3pZM3RNeHJIM1crT21uYU5keVN6dkpPUktjZEEwejNGU1huUk5jbnZpVlg0c3lGaWdhOUxGZjZ0ZDBhRy8xUFEwVjRCYzFQNXNHdTZBQUFBZUg0em5YNStNNTErUUpWZGorR2NMdTMwcE91U0E1cVZOQ0FodXl6RklBWWlhbjBFWUlnUlE3TmxYdz0K
---
-apiVersion: v1
-kind: Secret
-metadata:
-  name: github-deploy-key-aeries
-  namespace: rdev
-  labels:
-    app.kubernetes.io/name: claudebox-aeries
-    app.kubernetes.io/part-of: rdev
-    rdev.orchard9.ai/project: aeries
-type: Opaque
-data:
-  id_ed25519: LS0tLS1CRUdJTiBPUEVOU1NIIFBSSVZBVEUgS0VZLS0tLS0KYjNCbGJuTnphQzFyWlhrdGRqRUFBQUFBQkc1dmJtVUFBQUFFYm05dVpRQUFBQUFBQUFBQkFBQUFNd0FBQUF0emMyZ3RaVwpReU5UVXhPUUFBQUNBNWZzME9Cb0JWWTN3dmI2K256WngzRDltV3MrQVdKRHBIVjVaK3pCQmdyd0FBQUtDY05ERE1uRFF3CnpBQUFBQXR6YzJndFpXUXlOVFV4T1FBQUFDQTVmczBPQm9CVlkzd3ZiNituelp4M0Q5bVdzK0FXSkRwSFY1Wit6QkJncncKQUFBRUFnTU5PVDl3RlBHYnY3bTdYS1dTODVrVHYyZlhiSzgrdnR4NjQ1c2RqNmp6bCt6UTRHZ0ZWamZDOXZyNmZObkhjUAoyWmF6NEJZa09rZFhsbjdNRUdDdkFBQUFGM0prWlhZdFlXVnlhV1Z6UUc5eVkyaGhjbVE1TG1GcEFRSURCQVVHCi0tLS0tRU5EIE9QRU5TU0ggUFJJVkFURSBLRVktLS0tLQo=
-
-  # GitHub's SSH host key (pre-populated)
-  known_hosts: Z2l0aHViLmNvbSBzc2gtZWQyNTUxOSBBQUFBQzNOemFDMWxaREkxTlRFNUFBQUFJT01xcW5rVnpybTBTZEc2VU9vcUtMc2FiZ0g1Qzlva1dpMGRoMmw5R0tKbApnaXRodWIuY29tIGVjZHNhLXNoYTItbmlzdHAyNTYgQUFBQUUyVmpaSE5oTFhOb1lUSXRibWx6ZEhBeU5UWUFBQUFJYm1semRIQXlOVFlBQUFCQkJFbUtTRU5qUUVlek9teGtaTXk3b3BLZ3dGQjlua3Q1WVJyWU1qTnVHNU44N3VSUW81dDRRYkZGelVaYUpVQjd4TmtjYVFTNmlIbW5TazdNOU9tZUR2PT0KZ2l0aHViLmNvbSBzc2gtcnNhIEFBQUFCM056YUMxeWMyRUFBQUFEQVFBQkFBQUJnUUNqN25kTnhRb3dnY1FuanNoY0xycVBFaWlwaG50K1ZUVHZEUCtsSFhaZFhMRThWVUxDS0lLYjloZk5qM0FXSm1RTHBDb0Qzc1F2TWtGNUxXR1RMSFRVM25MSjViZi8wbG5wOGV5ZXhVNkpzR1dSUUFLTnlENjkzQjVVR2xXVlM1VjFqUEg1M3BZVllWUVB6WnlkeGpPUVFLeHk5ZkdoaVFGbGcza3RoZFdSRE5oNy9SRHp4SEZEZmRYYm5uSnZ4WVQ0Y1FVWWJ0SmFTQ0pWcU9aOVlUbG13bTJBUXZaM3IxZEJkZzVRcWN1SW53bzR1NXBhQUpObnpiTXBudGtzVXpWNEorUFN5OE9LSzRPc0tUc0I0RlNjS0VOSmRlMTlYTGFCUHJiNTZpUHhCS0tSMGJNK2NPdnhKelhhZWJORktjR2k4eVJLaGw0T0hlYkhCWDh4eFpZNWMwdWdpcTlSb29QaUtPelJERE1lekdhK0c4MDg1OVF2TkdPK3pZM3RNeHJIM1crT21uYU5keVN6dkpPUktjZEEwejNGU1huUk5jbnZpVlg0c3lGaWdhOUxGZjZ0ZDBhRy8xUFEwVjRCYzFQNXNHdTZBQUFBZUg0em5YNStNNTErUUpWZGorR2NMdTMwcE91U0E1cVZOQ0FodXl6RklBWWlhbjBFWUlnUlE3TmxYdz0K
--- a/docs/RELEASE_CHECKLIST.md
+++ b/docs/RELEASE_CHECKLIST.md
@ -0,0 +1,115 @@
+# v1.0.0 Release Checklist
+
+## Pre-release
+
+### Testing
+- [x] All unit tests pass (`go test ./...`)
+- [x] Integration tests pass
+- [x] E2E tests pass
+- [x] Benchmarks run successfully
+
+### Quality
+- [x] Static analysis clean (minor errcheck in tests only)
+- [x] Security scan reviewed (gosec findings are expected patterns)
+- [x] Cross-compilation verified (linux/amd64)
+
+### Coverage
+| Package | Coverage | Target | Status |
+|---------|----------|--------|--------|
+| internal/domain | 100% | >95% | ✅ |
+| internal/sanitize | 100% | N/A | ✅ |
+| internal/validate | 100% | N/A | ✅ |
+| internal/cmdlimit | 100% | N/A | ✅ |
+| internal/ratelimit | 95.7% | N/A | ✅ |
+| internal/circuitbreaker | 91.9% | N/A | ✅ |
+| internal/adapter/postgres | 90.7% | >80% | ✅ |
+| internal/service | 82.5% | >90% | ⚠️ |
+| internal/adapter/cached | 78.4% | >80% | ⚠️ |
+| internal/auth | 59.4% | >90% | ⚠️ |
+| internal/handlers | 55.8% | >85% | ⚠️ |
+
+Note: Some coverage targets not met, but core functionality is well-tested.
+
+### Documentation
+- [x] Architecture documentation complete
+- [x] API documentation complete
+- [x] Operations documentation complete
+- [x] Runbooks complete
+- [x] CHANGELOG.md updated
+- [x] README.md reviewed
+
+### Security
+- [x] Command sanitization implemented
+- [x] API key hashing (SHA-256)
+- [x] Rate limiting configured
+- [x] RBAC minimized
+- [x] Network policies defined
+- [x] Pod security context hardened
+
+## Release
+
+### Build
+```bash
+# Build binary
+GOOS=linux GOARCH=amd64 go build -o rdev-api ./cmd/rdev-api
+
+# Build Docker image
+docker build -t ghcr.io/orchard9/rdev-api:1.0.0 .
+
+# Push image
+docker push ghcr.io/orchard9/rdev-api:1.0.0
+```
+
+### Tag
+```bash
+git tag -a v1.0.0 -m "Release v1.0.0"
+git push origin v1.0.0
+```
+
+### Deploy
+```bash
+# Update image tag in kustomization
+# Apply to cluster
+kubectl apply -k deployments/k8s/overlays/prod
+
+# Verify deployment
+kubectl -n rdev rollout status deployment/rdev-api
+```
+
+## Post-release
+
+### Verification
+- [ ] Health endpoint responding
+- [ ] Readiness endpoint healthy
+- [ ] API key authentication working
+- [ ] Command execution working
+- [ ] SSE streaming working
+- [ ] Metrics endpoint exposing data
+
+### Monitoring
+- [ ] Prometheus scraping metrics
+- [ ] Grafana dashboard created
+- [ ] Alerts configured
+
+### Communication
+- [ ] Release notes published
+- [ ] Team notified
+- [ ] Documentation URL shared
+
+## Known Issues
+
+1. **Coverage below targets**: Some packages need additional test coverage
+2. **OpenTelemetry deferred**: Requires OTLP collector infrastructure
+3. **Gosec warnings**: G204 (command execution) is by design; G104 (unhandled errors) in cleanup code
+
+## Rollback
+
+If issues occur:
+
+```bash
+# Rollback to previous version
+kubectl -n rdev rollout undo deployment/rdev-api
+
+# Or rollback to specific revision
+kubectl -n rdev rollout undo deployment/rdev-api --to-revision=<N>
+```
--- a/docs/api/README.md
+++ b/docs/api/README.md
@ -0,0 +1,145 @@
+# rdev API Documentation
+
+rdev provides a REST API for remote development environments with SSE streaming support.
+
+## Quick Start
+
+### 1. Get an API Key
+
+Contact your administrator to get an API key, or create one if you have admin access:
+
+```bash
+curl -X POST https://rdev.example.com/keys \
+  -H "X-API-Key: your-admin-key" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "my-key",
+    "scopes": ["projects:read", "projects:execute"]
+  }'
+```
+
+### 2. List Projects
+
+```bash
+curl https://rdev.example.com/projects \
+  -H "X-API-Key: your-key"
+```
+
+### 3. Execute a Command
+
+```bash
+curl -X POST https://rdev.example.com/projects/my-project/shell \
+  -H "X-API-Key: your-key" \
+  -H "Content-Type: application/json" \
+  -d '{"command": "ls -la"}'
+```
+
+### 4. Stream Output
+
+```bash
+curl -N https://rdev.example.com/projects/my-project/events?stream_id=cmd-001 \
+  -H "X-API-Key: your-key"
+```
+
+## Base URL
+
+```
+https://rdev.example.com
+```
+
+## Authentication
+
+See [authentication.md](authentication.md) for details.
+
+All requests (except `/health`, `/ready`, `/metrics`) require authentication.
+
+**Header:**
+```
+X-API-Key: rdev_xxxx...
+```
+
+Or:
+```
+Authorization: Bearer rdev_xxxx...
+```
+
+## Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/projects` | List all projects |
+| GET | `/projects/{id}` | Get project details |
+| POST | `/projects/{id}/claude` | Run Claude command |
+| POST | `/projects/{id}/shell` | Run shell command |
+| POST | `/projects/{id}/git` | Run git command |
+| GET | `/projects/{id}/events` | SSE event stream |
+| GET | `/keys` | List API keys |
+| POST | `/keys` | Create API key |
+| DELETE | `/keys/{id}` | Revoke API key |
+| GET | `/health` | Liveness check |
+| GET | `/ready` | Readiness check |
+| GET | `/metrics` | Prometheus metrics |
+
+## Response Format
+
+All responses follow this format:
+
+### Success
+
+```json
+{
+  "data": { ... },
+  "meta": {
+    "request_id": "req-abc123",
+    "timestamp": "2024-01-15T10:30:00Z"
+  }
+}
+```
+
+### Error
+
+```json
+{
+  "error": {
+    "code": "NOT_FOUND",
+    "message": "Project not found: my-project"
+  },
+  "meta": {
+    "request_id": "req-abc123",
+    "timestamp": "2024-01-15T10:30:00Z"
+  }
+}
+```
+
+## Error Codes
+
+| Code | HTTP Status | Description |
+|------|-------------|-------------|
+| `BAD_REQUEST` | 400 | Invalid request body or parameters |
+| `UNAUTHORIZED` | 401 | Missing or invalid API key |
+| `FORBIDDEN` | 403 | Insufficient permissions |
+| `NOT_FOUND` | 404 | Resource not found |
+| `TOO_MANY_REQUESTS` | 429 | Rate limit exceeded |
+| `INTERNAL_ERROR` | 500 | Server error |
+
+## Rate Limiting
+
+Requests are rate limited per API key:
+
+| Limit Type | Default |
+|------------|---------|
+| Requests/second | 10 |
+| Concurrent commands | 5 |
+
+Headers:
+```
+X-RateLimit-Limit: 10
+X-RateLimit-Remaining: 7
+X-RateLimit-Reset: 1642089600
+```
+
+## Related Documentation
+
+- [Authentication Guide](authentication.md)
+- [SSE Streaming Examples](sse-examples.md)
+- [Error Handling](errors.md)
--- a/docs/api/authentication.md
+++ b/docs/api/authentication.md
@ -0,0 +1,257 @@
+# Authentication Guide
+
+rdev uses API keys for authentication. This guide covers how to authenticate requests and manage API keys.
+
+## API Key Format
+
+API keys follow this format:
+```
+rdev_<32 random characters>
+```
+
+Example:
+```
+rdev_a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6
+```
+
+## Authenticating Requests
+
+### Using X-API-Key Header
+
+```bash
+curl https://rdev.example.com/projects \
+  -H "X-API-Key: rdev_a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
+```
+
+### Using Authorization Header
+
+```bash
+curl https://rdev.example.com/projects \
+  -H "Authorization: Bearer rdev_a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
+```
+
+## Scopes
+
+API keys have scopes that limit what actions they can perform:
+
+| Scope | Description |
+|-------|-------------|
+| `projects:read` | List and view projects |
+| `projects:execute` | Execute commands in projects |
+| `keys:read` | List API keys |
+| `keys:write` | Create and revoke API keys |
+| `admin` | Full access to all operations |
+
+### Scope Inheritance
+
+- `admin` scope includes all other scopes
+- Command execution requires `projects:execute`
+- Reading projects requires `projects:read` or `projects:execute`
+
+## Managing API Keys
+
+### List Keys
+
+```bash
+curl https://rdev.example.com/keys \
+  -H "X-API-Key: your-admin-key"
+```
+
+Response:
+```json
+{
+  "data": [
+    {
+      "id": "key-001",
+      "name": "production-key",
+      "key_prefix": "rdev_a1b2",
+      "scopes": ["projects:read", "projects:execute"],
+      "created_at": "2024-01-01T00:00:00Z",
+      "last_used_at": "2024-01-15T10:30:00Z"
+    }
+  ]
+}
+```
+
+### Create Key
+
+```bash
+curl -X POST https://rdev.example.com/keys \
+  -H "X-API-Key: your-admin-key" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "ci-pipeline",
+    "scopes": ["projects:execute"],
+    "expires_in": "30d",
+    "allowed_ips": ["10.0.0.0/8"]
+  }'
+```
+
+Request body:
+```json
+{
+  "name": "ci-pipeline",
+  "scopes": ["projects:read", "projects:execute"],
+  "expires_in": "30d",
+  "allowed_ips": ["10.0.0.0/8", "192.168.1.0/24"]
+}
+```
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `name` | string | yes | Human-readable key name |
+| `scopes` | array | yes | Permission scopes |
+| `expires_in` | string | no | Expiration duration (e.g., "30d", "24h") |
+| `allowed_ips` | array | no | IP allowlist in CIDR notation |
+
+Response:
+```json
+{
+  "data": {
+    "id": "key-002",
+    "name": "ci-pipeline",
+    "key": "rdev_x1y2z3...",
+    "scopes": ["projects:execute"],
+    "created_at": "2024-01-15T10:30:00Z",
+    "expires_at": "2024-02-14T10:30:00Z"
+  }
+}
+```
+
+> **Important**: The full key is only returned once at creation time. Store it securely!
+
+### Revoke Key
+
+```bash
+curl -X DELETE https://rdev.example.com/keys/key-002 \
+  -H "X-API-Key: your-admin-key"
+```
+
+## IP Allowlisting
+
+Keys can be restricted to specific IP addresses:
+
+```json
+{
+  "name": "restricted-key",
+  "scopes": ["projects:execute"],
+  "allowed_ips": [
+    "10.0.0.0/8",
+    "192.168.1.100/32"
+  ]
+}
+```
+
+If `allowed_ips` is empty or not set, all IPs are allowed.
+
+## Key Expiration
+
+Keys can have an expiration time:
+
+- Set at creation with `expires_in`
+- Cannot be extended after creation
+- Expired keys return `KEY_EXPIRED` error
+
+## Best Practices
+
+### 1. Use Least Privilege
+
+Create keys with only the scopes needed:
+```json
+{
+  "name": "read-only-dashboard",
+  "scopes": ["projects:read"]
+}
+```
+
+### 2. Set Expiration
+
+For temporary access, set expiration:
+```json
+{
+  "name": "contractor-access",
+  "scopes": ["projects:execute"],
+  "expires_in": "7d"
+}
+```
+
+### 3. Use IP Restrictions
+
+For CI/CD pipelines with known IPs:
+```json
+{
+  "name": "github-actions",
+  "scopes": ["projects:execute"],
+  "allowed_ips": ["192.30.252.0/22"]
+}
+```
+
+### 4. Rotate Keys Regularly
+
+Create new keys and revoke old ones periodically.
+
+### 5. Use Descriptive Names
+
+Name keys by their purpose:
+- `ci-github-actions`
+- `dev-jordan-laptop`
+- `prod-monitoring`
+
+## Error Responses
+
+### 401 Unauthorized
+
+Missing or invalid API key:
+```json
+{
+  "error": {
+    "code": "UNAUTHORIZED",
+    "message": "Missing API key"
+  }
+}
+```
+
+### 401 Key Revoked
+
+```json
+{
+  "error": {
+    "code": "KEY_REVOKED",
+    "message": "API key has been revoked"
+  }
+}
+```
+
+### 401 Key Expired
+
+```json
+{
+  "error": {
+    "code": "KEY_EXPIRED",
+    "message": "API key has expired"
+  }
+}
+```
+
+### 403 IP Not Allowed
+
+```json
+{
+  "error": {
+    "code": "IP_NOT_ALLOWED",
+    "message": "IP address not allowed for this API key"
+  }
+}
+```
+
+### 403 Forbidden
+
+Insufficient scopes:
+```json
+{
+  "error": {
+    "code": "FORBIDDEN",
+    "message": "Insufficient permissions. Required: projects:execute"
+  }
+}
+```
--- a/docs/api/errors.md
+++ b/docs/api/errors.md
@ -0,0 +1,298 @@
+# Error Handling Guide
+
+This guide covers error responses from the rdev API and how to handle them.
+
+## Error Response Format
+
+All errors follow this format:
+
+```json
+{
+  "error": {
+    "code": "ERROR_CODE",
+    "message": "Human-readable error description"
+  },
+  "meta": {
+    "request_id": "req-abc123",
+    "timestamp": "2024-01-15T10:30:00Z"
+  }
+}
+```
+
+## Error Codes
+
+### Authentication Errors (4xx)
+
+| Code | HTTP Status | Description | Resolution |
+|------|-------------|-------------|------------|
+| `UNAUTHORIZED` | 401 | Missing or invalid API key | Check API key header |
+| `KEY_REVOKED` | 401 | API key has been revoked | Request new key |
+| `KEY_EXPIRED` | 401 | API key has expired | Request new key |
+| `FORBIDDEN` | 403 | Insufficient permissions | Use key with required scope |
+| `IP_NOT_ALLOWED` | 403 | IP not in allowlist | Use allowed IP or update key |
+
+### Resource Errors (4xx)
+
+| Code | HTTP Status | Description | Resolution |
+|------|-------------|-------------|------------|
+| `BAD_REQUEST` | 400 | Invalid request body | Check request format |
+| `NOT_FOUND` | 404 | Resource not found | Verify resource ID |
+| `TOO_MANY_REQUESTS` | 429 | Rate limit exceeded | Wait and retry |
+
+### Server Errors (5xx)
+
+| Code | HTTP Status | Description | Resolution |
+|------|-------------|-------------|------------|
+| `INTERNAL_ERROR` | 500 | Server error | Retry later, contact support |
+| `SERVICE_UNAVAILABLE` | 503 | Service not ready | Wait for service to be ready |
+
+## Handling Errors by Type
+
+### Authentication Errors
+
+```javascript
+async function handleAuthError(response) {
+  const { error } = await response.json();
+
+  switch (error.code) {
+    case 'UNAUTHORIZED':
+      // Key is missing or invalid
+      throw new Error('Invalid API key. Check your configuration.');
+
+    case 'KEY_REVOKED':
+      // Key was revoked by admin
+      throw new Error('API key was revoked. Request a new key.');
+
+    case 'KEY_EXPIRED':
+      // Key has expired
+      throw new Error('API key expired. Request a new key.');
+
+    case 'FORBIDDEN':
+      // Key lacks required scope
+      throw new Error(`Insufficient permissions: ${error.message}`);
+
+    case 'IP_NOT_ALLOWED':
+      // IP not in allowlist
+      throw new Error('Your IP is not allowed for this API key.');
+
+    default:
+      throw new Error(error.message);
+  }
+}
+```
+
+### Rate Limiting
+
+```javascript
+async function fetchWithRetry(url, options, maxRetries = 3) {
+  for (let i = 0; i < maxRetries; i++) {
+    const response = await fetch(url, options);
+
+    if (response.status === 429) {
+      const retryAfter = response.headers.get('X-RateLimit-Reset');
+      const waitMs = retryAfter
+        ? (parseInt(retryAfter) * 1000) - Date.now()
+        : 1000 * Math.pow(2, i); // Exponential backoff
+
+      console.log(`Rate limited. Waiting ${waitMs}ms...`);
+      await new Promise(resolve => setTimeout(resolve, waitMs));
+      continue;
+    }
+
+    return response;
+  }
+
+  throw new Error('Max retries exceeded');
+}
+```
+
+### Validation Errors
+
+```javascript
+async function handleValidationError(response) {
+  const { error } = await response.json();
+
+  // Error message contains field-specific info
+  // e.g., "prompt: is required"
+  // e.g., "command: contains dangerous characters"
+
+  const match = error.message.match(/^(\w+): (.+)$/);
+  if (match) {
+    const [, field, message] = match;
+    return {
+      field,
+      message,
+    };
+  }
+
+  return { message: error.message };
+}
+```
+
+## Error Handling Best Practices
+
+### 1. Always Check Status Code
+
+```javascript
+const response = await fetch(url, options);
+
+if (!response.ok) {
+  const error = await response.json();
+  throw new APIError(response.status, error.error);
+}
+
+return response.json();
+```
+
+### 2. Use Custom Error Class
+
+```javascript
+class APIError extends Error {
+  constructor(status, error) {
+    super(error.message);
+    this.name = 'APIError';
+    this.status = status;
+    this.code = error.code;
+  }
+
+  isRetryable() {
+    return this.status >= 500 || this.status === 429;
+  }
+
+  isAuthError() {
+    return this.status === 401 || this.status === 403;
+  }
+}
+```
+
+### 3. Log Request IDs
+
+```javascript
+async function logError(response, error) {
+  const requestId = error.meta?.request_id;
+  console.error(`Request ${requestId} failed:`, error.error);
+
+  // Include in bug reports
+  return {
+    requestId,
+    error: error.error,
+    url: response.url,
+    timestamp: new Date().toISOString(),
+  };
+}
+```
+
+### 4. Implement Circuit Breaker
+
+```javascript
+class CircuitBreaker {
+  constructor(threshold = 5, timeout = 60000) {
+    this.failures = 0;
+    this.threshold = threshold;
+    this.timeout = timeout;
+    this.lastFailure = null;
+  }
+
+  async execute(fn) {
+    if (this.isOpen()) {
+      throw new Error('Circuit breaker is open');
+    }
+
+    try {
+      const result = await fn();
+      this.reset();
+      return result;
+    } catch (error) {
+      this.recordFailure();
+      throw error;
+    }
+  }
+
+  isOpen() {
+    if (this.failures < this.threshold) return false;
+    if (Date.now() - this.lastFailure > this.timeout) {
+      this.reset();
+      return false;
+    }
+    return true;
+  }
+
+  recordFailure() {
+    this.failures++;
+    this.lastFailure = Date.now();
+  }
+
+  reset() {
+    this.failures = 0;
+    this.lastFailure = null;
+  }
+}
+```
+
+## Common Error Scenarios
+
+### Missing API Key
+
+```bash
+curl http://localhost:8080/projects
+
+# Response: 401
+{
+  "error": {
+    "code": "UNAUTHORIZED",
+    "message": "Missing API key"
+  }
+}
+```
+
+**Fix**: Add the `X-API-Key` header.
+
+### Invalid Command
+
+```bash
+curl -X POST http://localhost:8080/projects/test/shell \
+  -H "X-API-Key: rdev_xxx" \
+  -d '{"command": "rm -rf /"}'
+
+# Response: 400
+{
+  "error": {
+    "code": "BAD_REQUEST",
+    "message": "destructive rm command not allowed"
+  }
+}
+```
+
+**Fix**: Use safe commands. See security documentation for allowed patterns.
+
+### Project Not Found
+
+```bash
+curl http://localhost:8080/projects/nonexistent \
+  -H "X-API-Key: rdev_xxx"
+
+# Response: 404
+{
+  "error": {
+    "code": "NOT_FOUND",
+    "message": "project not found: nonexistent"
+  }
+}
+```
+
+**Fix**: Check project ID. List projects to see available ones.
+
+### Rate Limited
+
+```bash
+# After too many requests
+# Response: 429
+{
+  "error": {
+    "code": "TOO_MANY_REQUESTS",
+    "message": "Rate limit exceeded"
+  }
+}
+```
+
+**Fix**: Wait for `X-RateLimit-Reset` timestamp, then retry.
--- a/docs/api/sse-examples.md
+++ b/docs/api/sse-examples.md
@ -0,0 +1,374 @@
+# SSE Streaming Examples
+
+rdev uses Server-Sent Events (SSE) for real-time command output streaming. This guide provides examples in JavaScript, Python, and Go.
+
+## Event Types
+
+| Event | Description |
+|-------|-------------|
+| `connected` | Stream established |
+| `output` | Command output line |
+| `complete` | Command finished |
+| `heartbeat` | Keep-alive signal |
+| `error` | Error occurred |
+
+## JavaScript (Browser/Node.js)
+
+### Browser with EventSource
+
+```javascript
+async function executeCommand(projectId, command, apiKey) {
+  // 1. Start the command
+  const response = await fetch(`/projects/${projectId}/shell`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'X-API-Key': apiKey,
+    },
+    body: JSON.stringify({ command }),
+  });
+
+  const { data } = await response.json();
+  const streamId = data.id;
+
+  // 2. Connect to SSE stream
+  const eventSource = new EventSource(
+    `/projects/${projectId}/events?stream_id=${streamId}`
+  );
+
+  return new Promise((resolve, reject) => {
+    eventSource.addEventListener('connected', (e) => {
+      console.log('Connected:', JSON.parse(e.data));
+    });
+
+    eventSource.addEventListener('output', (e) => {
+      const { line, stream } = JSON.parse(e.data);
+      if (stream === 'stdout') {
+        process.stdout.write(line + '\n');
+      } else {
+        process.stderr.write(line + '\n');
+      }
+    });
+
+    eventSource.addEventListener('complete', (e) => {
+      const { exit_code, duration_ms } = JSON.parse(e.data);
+      eventSource.close();
+      resolve({ exitCode: exit_code, duration: duration_ms });
+    });
+
+    eventSource.addEventListener('error', (e) => {
+      if (e.data) {
+        const { message } = JSON.parse(e.data);
+        reject(new Error(message));
+      }
+      // Connection error - browser will auto-reconnect
+    });
+  });
+}
+
+// Usage
+executeCommand('my-project', 'npm test', 'rdev_xxx')
+  .then(({ exitCode }) => console.log('Exit code:', exitCode))
+  .catch(console.error);
+```
+
+### Node.js with eventsource package
+
+```javascript
+const EventSource = require('eventsource');
+
+function connectSSE(url, apiKey, handlers) {
+  const eventSource = new EventSource(url, {
+    headers: {
+      'X-API-Key': apiKey,
+    },
+  });
+
+  eventSource.addEventListener('connected', (e) => {
+    handlers.onConnected?.(JSON.parse(e.data));
+  });
+
+  eventSource.addEventListener('output', (e) => {
+    handlers.onOutput?.(JSON.parse(e.data));
+  });
+
+  eventSource.addEventListener('complete', (e) => {
+    handlers.onComplete?.(JSON.parse(e.data));
+    eventSource.close();
+  });
+
+  eventSource.onerror = (e) => {
+    handlers.onError?.(e);
+  };
+
+  return eventSource;
+}
+
+// Usage
+connectSSE(
+  'http://localhost:8080/projects/my-project/events?stream_id=cmd-001',
+  'rdev_xxx',
+  {
+    onOutput: ({ line, stream }) => console.log(`[${stream}] ${line}`),
+    onComplete: ({ exit_code }) => console.log('Done:', exit_code),
+  }
+);
+```
+
+## Python
+
+### Using sseclient-py
+
+```python
+import requests
+import sseclient
+import json
+
+def execute_command(base_url, project_id, command, api_key):
+    """Execute a command and stream output."""
+    headers = {
+        'X-API-Key': api_key,
+        'Content-Type': 'application/json',
+    }
+
+    # 1. Start the command
+    response = requests.post(
+        f'{base_url}/projects/{project_id}/shell',
+        headers=headers,
+        json={'command': command}
+    )
+    data = response.json()['data']
+    stream_id = data['id']
+
+    # 2. Connect to SSE stream
+    response = requests.get(
+        f'{base_url}/projects/{project_id}/events',
+        params={'stream_id': stream_id},
+        headers={'X-API-Key': api_key},
+        stream=True
+    )
+
+    client = sseclient.SSEClient(response)
+    result = None
+
+    for event in client.events():
+        data = json.loads(event.data)
+
+        if event.event == 'connected':
+            print(f"Connected: {data}")
+
+        elif event.event == 'output':
+            stream = data.get('stream', 'stdout')
+            line = data.get('line', '')
+            print(f"[{stream}] {line}")
+
+        elif event.event == 'complete':
+            result = {
+                'exit_code': data['exit_code'],
+                'duration_ms': data['duration_ms']
+            }
+            break
+
+        elif event.event == 'heartbeat':
+            pass  # Keep-alive
+
+    return result
+
+# Usage
+result = execute_command(
+    'http://localhost:8080',
+    'my-project',
+    'pip install -r requirements.txt',
+    'rdev_xxx'
+)
+print(f"Exit code: {result['exit_code']}")
+```
+
+### Using aiohttp (async)
+
+```python
+import aiohttp
+import asyncio
+import json
+
+async def execute_command_async(base_url, project_id, command, api_key):
+    """Execute a command asynchronously."""
+    headers = {'X-API-Key': api_key}
+
+    async with aiohttp.ClientSession(headers=headers) as session:
+        # 1. Start the command
+        async with session.post(
+            f'{base_url}/projects/{project_id}/shell',
+            json={'command': command}
+        ) as resp:
+            data = await resp.json()
+            stream_id = data['data']['id']
+
+        # 2. Connect to SSE stream
+        async with session.get(
+            f'{base_url}/projects/{project_id}/events',
+            params={'stream_id': stream_id}
+        ) as resp:
+            async for line in resp.content:
+                line = line.decode('utf-8').strip()
+
+                if line.startswith('event:'):
+                    event_type = line[7:]
+                elif line.startswith('data:'):
+                    data = json.loads(line[6:])
+
+                    if event_type == 'output':
+                        print(f"[{data['stream']}] {data['line']}")
+                    elif event_type == 'complete':
+                        return data
+
+# Usage
+result = asyncio.run(execute_command_async(
+    'http://localhost:8080',
+    'my-project',
+    'make build',
+    'rdev_xxx'
+))
+```
+
+## Go
+
+```go
+package main
+
+import (
+    "bufio"
+    "encoding/json"
+    "fmt"
+    "net/http"
+    "strings"
+)
+
+type OutputEvent struct {
+    Line   string `json:"line"`
+    Stream string `json:"stream"`
+}
+
+type CompleteEvent struct {
+    ExitCode   int `json:"exit_code"`
+    DurationMs int `json:"duration_ms"`
+}
+
+func executeCommand(baseURL, projectID, command, apiKey string) (*CompleteEvent, error) {
+    client := &http.Client{}
+
+    // 1. Start the command
+    reqBody := fmt.Sprintf(`{"command": %q}`, command)
+    req, _ := http.NewRequest("POST",
+        fmt.Sprintf("%s/projects/%s/shell", baseURL, projectID),
+        strings.NewReader(reqBody))
+    req.Header.Set("X-API-Key", apiKey)
+    req.Header.Set("Content-Type", "application/json")
+
+    resp, err := client.Do(req)
+    if err != nil {
+        return nil, err
+    }
+    defer resp.Body.Close()
+
+    var startResp struct {
+        Data struct {
+            ID string `json:"id"`
+        } `json:"data"`
+    }
+    json.NewDecoder(resp.Body).Decode(&startResp)
+    streamID := startResp.Data.ID
+
+    // 2. Connect to SSE stream
+    req, _ = http.NewRequest("GET",
+        fmt.Sprintf("%s/projects/%s/events?stream_id=%s", baseURL, projectID, streamID),
+        nil)
+    req.Header.Set("X-API-Key", apiKey)
+
+    resp, err = client.Do(req)
+    if err != nil {
+        return nil, err
+    }
+    defer resp.Body.Close()
+
+    scanner := bufio.NewScanner(resp.Body)
+    var eventType string
+
+    for scanner.Scan() {
+        line := scanner.Text()
+
+        if strings.HasPrefix(line, "event:") {
+            eventType = strings.TrimSpace(line[6:])
+        } else if strings.HasPrefix(line, "data:") {
+            data := line[5:]
+
+            switch eventType {
+            case "output":
+                var output OutputEvent
+                json.Unmarshal([]byte(data), &output)
+                fmt.Printf("[%s] %s\n", output.Stream, output.Line)
+
+            case "complete":
+                var complete CompleteEvent
+                json.Unmarshal([]byte(data), &complete)
+                return &complete, nil
+            }
+        }
+    }
+
+    return nil, scanner.Err()
+}
+
+func main() {
+    result, err := executeCommand(
+        "http://localhost:8080",
+        "my-project",
+        "go test ./...",
+        "rdev_xxx",
+    )
+    if err != nil {
+        panic(err)
+    }
+    fmt.Printf("Exit code: %d\n", result.ExitCode)
+}
+```
+
+## Reconnection Handling
+
+SSE supports automatic reconnection with the `Last-Event-ID` header.
+
+### JavaScript
+
+The browser's EventSource automatically reconnects with `Last-Event-ID`.
+
+### Python
+
+```python
+def connect_with_reconnect(url, api_key, last_event_id=None):
+    headers = {'X-API-Key': api_key}
+    if last_event_id:
+        headers['Last-Event-ID'] = last_event_id
+
+    response = requests.get(url, headers=headers, stream=True)
+    return sseclient.SSEClient(response)
+```
+
+### Go
+
+```go
+req.Header.Set("Last-Event-ID", lastEventID)
+```
+
+## Error Handling
+
+Always handle SSE errors gracefully:
+
+```javascript
+eventSource.onerror = (e) => {
+  if (eventSource.readyState === EventSource.CLOSED) {
+    console.log('Connection closed');
+  } else {
+    console.log('Connection error, reconnecting...');
+  }
+};
+```
--- a/docs/architecture/README.md
+++ b/docs/architecture/README.md
@ -0,0 +1,140 @@
+# rdev Architecture
+
+rdev is a remote development API that enables secure command execution in isolated Kubernetes pods. This document provides an overview of the system architecture.
+
+## System Context
+
+```
+                    ┌─────────────────────────────────────────────────┐
+                    │                   rdev API                       │
+                    │                                                  │
+    ┌──────────┐    │  ┌──────────┐   ┌───────────┐   ┌───────────┐  │    ┌──────────┐
+    │  Client  │────┼─▶│   HTTP   │──▶│  Service  │──▶│  Adapter  │──┼───▶│   K8s    │
+    │  (SDK)   │◀───┼──│ Handler  │◀──│   Layer   │◀──│   Layer   │◀─┼────│  Cluster │
+    └──────────┘    │  └──────────┘   └───────────┘   └───────────┘  │    └──────────┘
+                    │       │              │               │          │
+                    │       │              │               │          │
+                    │       ▼              ▼               ▼          │
+                    │  ┌──────────┐   ┌───────────┐   ┌───────────┐  │
+                    │  │   Auth   │   │  Domain   │   │  Postgres │  │
+                    │  │Middleware│   │  Models   │   │    DB     │  │
+                    │  └──────────┘   └───────────┘   └───────────┘  │
+                    └─────────────────────────────────────────────────┘
+```
+
+## Key Components
+
+### HTTP Layer
+- **Handlers**: REST endpoints for projects, commands, SSE streaming, API keys
+- **Middleware**: Authentication, rate limiting, metrics, logging
+- **Validation**: Input sanitization, command filtering
+
+### Service Layer
+- **ProjectService**: Project lifecycle management
+- **AuthService**: API key management, validation
+
+### Adapter Layer
+- **Kubernetes Executor**: Command execution via kubectl exec
+- **Project Repository**: Pod discovery, status monitoring
+- **Postgres Repository**: API key storage
+
+### Domain
+- **Models**: Project, Command, APIKey, Scope
+- **Errors**: Domain-specific error types
+- **Events**: Command output, completion events
+
+## Architecture Patterns
+
+### Hexagonal Architecture (Ports & Adapters)
+
+See [hexagonal.md](hexagonal.md) for detailed explanation.
+
+```
+        ┌─────────────────────────────────────────────┐
+        │              Application Core               │
+        │  ┌───────────────────────────────────────┐  │
+        │  │           Domain Models                │  │
+        │  │  (Project, Command, APIKey, Scope)     │  │
+        │  └───────────────────────────────────────┘  │
+        │  ┌───────────────────────────────────────┐  │
+        │  │           Port Interfaces              │  │
+        │  │  (ProjectRepository, Executor, etc.)   │  │
+        │  └───────────────────────────────────────┘  │
+        │  ┌───────────────────────────────────────┐  │
+        │  │           Service Layer                │  │
+        │  │  (ProjectService, uses ports)          │  │
+        │  └───────────────────────────────────────┘  │
+        └───────────────────┬─────────────────────────┘
+                            │
+        ┌───────────────────┴─────────────────────────┐
+        │              Adapters                        │
+        │  ┌─────────────┐  ┌─────────────┐           │
+        │  │  Kubernetes │  │   Postgres  │           │
+        │  │   Adapter   │  │   Adapter   │           │
+        │  └─────────────┘  └─────────────┘           │
+        └─────────────────────────────────────────────┘
+```
+
+### Key Flows
+
+1. **Command Execution**: Client -> Handler -> Service -> Executor -> K8s Pod
+2. **SSE Streaming**: Command output -> StreamManager -> SSE Response
+3. **Authentication**: Request -> Middleware -> AuthService -> DB Lookup
+
+See [streaming.md](streaming.md) and [security.md](security.md) for details.
+
+## Technology Stack
+
+| Component | Technology |
+|-----------|------------|
+| Language | Go 1.22+ |
+| HTTP Router | chi v5 |
+| Database | PostgreSQL |
+| Container Orchestration | Kubernetes |
+| Metrics | Prometheus |
+| Tracing | OpenTelemetry (optional) |
+
+## Package Structure
+
+```
+internal/
+├── adapter/           # Infrastructure adapters
+│   ├── cached/       # Caching wrappers
+│   └── kubernetes/   # K8s client implementation
+├── auth/             # Authentication & authorization
+├── circuitbreaker/   # Circuit breaker for resilience
+├── db/               # Database connectivity
+├── domain/           # Core domain models
+├── handlers/         # HTTP handlers
+├── metrics/          # Prometheus metrics
+├── port/             # Port interfaces (abstractions)
+├── ratelimit/        # Rate limiting
+├── sanitize/         # Command sanitization
+├── service/          # Business logic services
+└── validate/         # Input validation
+
+pkg/
+└── api/              # Shared API utilities
+
+cmd/
+└── rdev-api/         # Main application entry point
+```
+
+## Configuration
+
+Environment variables:
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `PORT` | HTTP server port | 8080 |
+| `POSTGRES_HOST` | Database host | postgres.databases.svc |
+| `POSTGRES_DB` | Database name | rdev |
+| `RDEV_NAMESPACE` | K8s namespace for pods | default |
+| `RATE_LIMIT_RPS` | Requests per second limit | 10 |
+| `CONCURRENT_COMMANDS` | Max concurrent commands | 5 |
+
+## Related Documentation
+
+- [Hexagonal Architecture](hexagonal.md) - Port/Adapter pattern details
+- [Security](security.md) - Auth, sanitization, rate limiting
+- [Streaming](streaming.md) - SSE protocol, reconnection handling
--- a/docs/architecture/diagrams/component.mmd
+++ b/docs/architecture/diagrams/component.mmd
@ -0,0 +1,57 @@
+---
+title: rdev Component Diagram
+---
+flowchart TB
+    subgraph Handlers["HTTP Handlers"]
+        PH[Projects Handler]
+        KH[Keys Handler]
+        HH[Health Handler]
+        CH[Claude Config Handler]
+    end
+
+    subgraph Middleware
+        Auth[Auth Middleware]
+        Rate[Rate Limiter]
+        Metrics[Metrics]
+    end
+
+    subgraph Services
+        PS[Project Service]
+        AS[Auth Service]
+    end
+
+    subgraph Ports["Port Interfaces"]
+        PR[ProjectRepository]
+        EX[CommandExecutor]
+        SM[StreamManager]
+    end
+
+    subgraph Adapters
+        K8sRepo[K8s Project Repository]
+        K8sExec[K8s Executor]
+        CachedRepo[Cached Repository]
+        PgRepo[Postgres Repository]
+    end
+
+    subgraph Domain
+        Models[Domain Models]
+        Errors[Domain Errors]
+    end
+
+    %% Connections
+    PH --> PS
+    KH --> AS
+
+    Middleware --> Handlers
+
+    PS --> PR
+    PS --> EX
+    PS --> SM
+    AS --> PgRepo
+
+    CachedRepo --> K8sRepo
+    PR -.-> CachedRepo
+    EX -.-> K8sExec
+
+    Services --> Domain
+    Adapters --> Domain
--- a/docs/architecture/diagrams/sequence-command.mmd
+++ b/docs/architecture/diagrams/sequence-command.mmd
@ -0,0 +1,38 @@
+---
+title: Command Execution Sequence
+---
+sequenceDiagram
+    participant C as Client
+    participant H as Handler
+    participant S as Service
+    participant E as Executor
+    participant P as Pod
+    participant SM as StreamManager
+
+    C->>H: POST /projects/{id}/claude
+    H->>H: Validate request
+    H->>S: ExecuteClaude(req)
+    S->>S: Sanitize prompt
+    S->>E: Execute(cmd, podName, outputFn)
+
+    E->>P: kubectl exec
+    activate P
+
+    Note over C,SM: Client connects to SSE stream
+
+    C->>H: GET /projects/{id}/events?stream_id=xxx
+    H->>SM: Subscribe(streamID)
+    SM-->>C: event: connected
+
+    loop Output streaming
+        P-->>E: stdout/stderr line
+        E->>SM: Send("output", line)
+        SM-->>C: event: output
+    end
+
+    P-->>E: exit code
+    deactivate P
+
+    E->>SM: Send("complete", result)
+    SM-->>C: event: complete
+    C->>C: Close SSE connection
--- a/docs/architecture/diagrams/system-context.mmd
+++ b/docs/architecture/diagrams/system-context.mmd
@ -0,0 +1,19 @@
+---
+title: rdev System Context Diagram
+---
+flowchart TB
+    subgraph External
+        Client[Client Applications<br/>SDK, CLI, Web]
+        K8s[Kubernetes Cluster<br/>claudebox pods]
+        Postgres[(PostgreSQL<br/>API keys, audit)]
+    end
+
+    subgraph rdev["rdev API"]
+        API[HTTP API<br/>REST + SSE]
+    end
+
+    Client -->|HTTP/SSE| API
+    API -->|kubectl exec| K8s
+    API -->|SQL| Postgres
+
+    style rdev fill:#f9f,stroke:#333,stroke-width:2px
--- a/docs/architecture/hexagonal.md
+++ b/docs/architecture/hexagonal.md
@ -0,0 +1,273 @@
+# Hexagonal Architecture (Ports & Adapters)
+
+rdev implements hexagonal architecture to achieve clean separation of concerns, testability, and flexibility in infrastructure choices.
+
+## Overview
+
+Hexagonal architecture organizes code into three layers:
+
+1. **Domain** - Core business logic and models
+2. **Ports** - Abstract interfaces defining capabilities
+3. **Adapters** - Concrete implementations of ports
+
+```
+                         ┌─────────────────────────┐
+                         │        Domain           │
+                         │                         │
+    Driving              │  ┌─────────────────┐   │              Driven
+    (Primary)            │  │     Models      │   │             (Secondary)
+    Adapters             │  │  Project, Cmd   │   │              Adapters
+        │                │  │  APIKey, Scope  │   │                  │
+        │                │  └─────────────────┘   │                  │
+        │                │                         │                  │
+        │                │  ┌─────────────────┐   │                  │
+        │                │  │     Ports       │   │                  │
+        ▼                │  │  (Interfaces)   │   │                  ▼
+    ┌───────┐            │  └─────────────────┘   │            ┌───────┐
+    │ HTTP  │───────────▶│                         │◀───────────│  K8s  │
+    │Handler│            │  ┌─────────────────┐   │            │Adapter│
+    └───────┘            │  │    Services     │   │            └───────┘
+                         │  │ ProjectService  │   │
+                         │  │   AuthService   │   │            ┌───────┐
+                         │  └─────────────────┘   │◀───────────│  DB   │
+                         │                         │            │Adapter│
+                         └─────────────────────────┘            └───────┘
+```
+
+## Domain Layer
+
+Located in `internal/domain/`.
+
+### Models
+
+```go
+// Project represents a development environment
+type Project struct {
+    ID          ProjectID
+    Name        string
+    PodName     string
+    Status      ProjectStatus
+    LastSeen    time.Time
+    Labels      map[string]string
+    Annotations map[string]string
+}
+
+// Command represents an executable command
+type Command struct {
+    ID        CommandID
+    ProjectID ProjectID
+    Type      CommandType
+    Args      []string
+    Status    CommandStatus
+    StartedAt time.Time
+    EndedAt   *time.Time
+    ExitCode  *int
+}
+```
+
+### Domain Errors
+
+```go
+var (
+    ErrProjectNotFound      = errors.New("project not found")
+    ErrCommandNotFound      = errors.New("command not found")
+    ErrInvalidCommand       = errors.New("invalid command")
+    ErrCommandSanitization  = errors.New("command failed sanitization")
+)
+```
+
+## Ports Layer
+
+Located in `internal/port/`.
+
+### Port Interfaces
+
+```go
+// ProjectRepository defines project data access
+type ProjectRepository interface {
+    List(ctx context.Context) ([]domain.Project, error)
+    Get(ctx context.Context, id domain.ProjectID) (*domain.Project, error)
+    Exists(ctx context.Context, id domain.ProjectID) (bool, error)
+    RefreshStatus(ctx context.Context) error
+}
+
+// CommandExecutor defines command execution capability
+type CommandExecutor interface {
+    Execute(ctx context.Context, cmd *domain.Command, podName string,
+            outputFn func(domain.OutputLine)) (*domain.CommandResult, error)
+    Cancel(id domain.CommandID) error
+    ActiveCount() int
+}
+```
+
+### Benefits of Ports
+
+1. **Testability**: Mock implementations for unit tests
+2. **Flexibility**: Swap adapters without changing business logic
+3. **Documentation**: Interfaces define contracts clearly
+
+## Adapters Layer
+
+Located in `internal/adapter/`.
+
+### Kubernetes Adapter
+
+```go
+// kubernetes/project_repository.go
+type ProjectRepository struct {
+    namespace string
+    client    kubernetes.Interface
+    projects  []domain.Project
+    mu        sync.RWMutex
+}
+
+func (r *ProjectRepository) List(ctx context.Context) ([]domain.Project, error) {
+    // Uses K8s API to discover pods with rdev labels
+}
+
+// kubernetes/executor.go
+type Executor struct {
+    namespace      string
+    activeCommands map[domain.CommandID]context.CancelFunc
+}
+
+func (e *Executor) Execute(ctx context.Context, cmd *domain.Command,
+    podName string, outputFn func(domain.OutputLine)) (*domain.CommandResult, error) {
+    // Uses kubectl exec to run commands
+}
+```
+
+### Caching Adapter
+
+```go
+// cached/project_repository.go
+type ProjectRepository struct {
+    inner         port.ProjectRepository
+    ttl           time.Duration
+    projectsCache []domain.Project
+    lastFetch     time.Time
+}
+
+func (r *ProjectRepository) List(ctx context.Context) ([]domain.Project, error) {
+    if r.isCacheFresh() {
+        return r.projectsCache, nil
+    }
+    return r.inner.List(ctx)
+}
+```
+
+## Service Layer
+
+Located in `internal/service/`.
+
+Services orchestrate domain logic using ports:
+
+```go
+type ProjectService struct {
+    repo     port.ProjectRepository
+    executor port.CommandExecutor
+    streams  port.StreamManager
+}
+
+func (s *ProjectService) ExecuteClaude(ctx context.Context,
+    req ExecuteClaudeRequest) (*ExecuteResult, error) {
+
+    // 1. Validate project exists
+    project, err := s.repo.Get(ctx, req.ProjectID)
+    if err != nil {
+        return nil, err
+    }
+
+    // 2. Sanitize command
+    if err := sanitize.ClaudePrompt(req.Prompt); err != nil {
+        return nil, domain.ErrCommandSanitization
+    }
+
+    // 3. Execute via port
+    result, err := s.executor.Execute(ctx, cmd, project.PodName,
+        s.handleOutput)
+
+    return result, nil
+}
+```
+
+## Dependency Injection
+
+Dependencies flow inward:
+
+```go
+// cmd/rdev-api/main.go
+func main() {
+    // Create adapters
+    k8sClient := kubernetes.NewClientset()
+    projectRepo := kubernetes.NewProjectRepositoryWithClient(namespace, k8sClient)
+    cachedRepo := cached.NewProjectRepository(projectRepo, 30*time.Second)
+    executor := kubernetes.NewExecutor(namespace)
+
+    // Create services with ports
+    projectService := service.NewProjectService(cachedRepo, executor)
+
+    // Create handlers with services
+    projectsHandler := handlers.NewProjectsHandlerWithService(projectService)
+}
+```
+
+## Testing
+
+### Unit Tests with Mocks
+
+```go
+type mockProjectRepo struct {
+    projects []domain.Project
+}
+
+func (m *mockProjectRepo) Get(ctx context.Context, id domain.ProjectID) (*domain.Project, error) {
+    for _, p := range m.projects {
+        if p.ID == id {
+            return &p, nil
+        }
+    }
+    return nil, domain.ErrProjectNotFound
+}
+
+func TestProjectService_ExecuteClaude(t *testing.T) {
+    repo := &mockProjectRepo{projects: testProjects}
+    exec := &mockExecutor{}
+    svc := service.NewProjectService(repo, exec)
+
+    result, err := svc.ExecuteClaude(ctx, req)
+    // Assert...
+}
+```
+
+### Integration Tests with Real Adapters
+
+```go
+func TestKubernetesAdapter_Execute(t *testing.T) {
+    if testing.Short() {
+        t.Skip("skipping integration test")
+    }
+
+    executor := kubernetes.NewExecutor("test-namespace")
+    // Test with real K8s...
+}
+```
+
+## Trade-offs
+
+### Benefits
+- Clear separation of concerns
+- Easy to test in isolation
+- Flexible infrastructure choices
+- Domain logic remains pure
+
+### Costs
+- More interfaces and types
+- Initial setup complexity
+- Some indirection overhead
+
+## Related Patterns
+
+- **Repository Pattern**: Abstracts data access
+- **Service Layer Pattern**: Orchestrates business logic
+- **Dependency Injection**: Decouples creation from usage
--- a/docs/architecture/security.md
+++ b/docs/architecture/security.md
@ -0,0 +1,322 @@
+# Security Architecture
+
+rdev implements defense in depth with multiple security layers.
+
+## Authentication
+
+### API Keys
+
+All API requests (except health checks) require authentication:
+
+```
+┌────────────┐     ┌────────────┐     ┌────────────┐     ┌────────────┐
+│   Client   │────▶│    Auth    │────▶│    Auth    │────▶│   Handler  │
+│            │     │ Middleware │     │  Service   │     │            │
+└────────────┘     └────────────┘     └────────────┘     └────────────┘
+                         │                   │
+                         │                   ▼
+                         │            ┌────────────┐
+                         │            │  Postgres  │
+                         │            │   (keys)   │
+                         │            └────────────┘
+                         ▼
+                   Check IP Allowlist
+```
+
+### Key Format
+
+```
+rdev_<random_32_chars>
+```
+
+Keys are stored as SHA-256 hashes, never in plaintext.
+
+### Authentication Flow
+
+1. Extract key from `X-API-Key` header or `Authorization: Bearer` header
+2. Hash the key with SHA-256
+3. Look up hash in database
+4. Verify key is not revoked or expired
+5. Check IP allowlist (if configured)
+6. Add key to request context
+
+### Scopes
+
+| Scope | Description |
+|-------|-------------|
+| `projects:read` | List and view projects |
+| `projects:execute` | Execute commands |
+| `keys:read` | List API keys |
+| `keys:write` | Create/revoke keys |
+| `admin` | Full access |
+
+### IP Allowlisting
+
+API keys can be restricted to specific IP addresses or CIDR ranges:
+
+```go
+type APIKey struct {
+    // ...
+    AllowedIPs []string // CIDR notation: ["192.168.1.0/24", "10.0.0.0/8"]
+}
+
+func (k *APIKey) IsIPAllowed(clientIP string) bool {
+    if len(k.AllowedIPs) == 0 {
+        return true // No restriction
+    }
+    for _, cidr := range k.AllowedIPs {
+        _, network, _ := net.ParseCIDR(cidr)
+        if network.Contains(net.ParseIP(clientIP)) {
+            return true
+        }
+    }
+    return false
+}
+```
+
+## Command Sanitization
+
+All commands are sanitized before execution to prevent:
+
+### Shell Injection Protection
+
+```go
+// internal/sanitize/command.go
+
+func ShellCommand(cmd string) error {
+    // Block command chaining
+    dangerous := []string{";", "&&", "||", "|", "`", "$(", "${"}
+    for _, d := range dangerous {
+        if strings.Contains(cmd, d) {
+            return fmt.Errorf("command chaining not allowed")
+        }
+    }
+
+    // Block redirects
+    if strings.ContainsAny(cmd, "<>") {
+        return fmt.Errorf("redirects not allowed")
+    }
+
+    // Block destructive commands
+    if isDestructiveRm(cmd) {
+        return fmt.Errorf("destructive rm not allowed")
+    }
+
+    return nil
+}
+```
+
+### Blocked Patterns
+
+| Category | Examples |
+|----------|----------|
+| Command chaining | `; && || \| $() \`\`` |
+| Redirects | `> >> < <<` |
+| Destructive | `rm -rf /`, `dd if=` |
+| Escape sequences | Null bytes, control chars |
+
+### Git Command Restrictions
+
+```go
+func GitArgs(args []string) error {
+    if len(args) == 0 {
+        return errors.New("no git subcommand")
+    }
+
+    blocked := map[string]bool{
+        "config": true,  // Could change credentials
+        "remote": true,  // Could add malicious remotes
+    }
+
+    if blocked[args[0]] {
+        return fmt.Errorf("git %s not allowed", args[0])
+    }
+
+    // Block force push
+    if args[0] == "push" {
+        for _, arg := range args {
+            if arg == "-f" || arg == "--force" {
+                return errors.New("force push not allowed")
+            }
+        }
+    }
+
+    return nil
+}
+```
+
+### Claude Prompt Sanitization
+
+```go
+func ClaudePrompt(prompt string) error {
+    // Check for null bytes
+    if strings.ContainsRune(prompt, 0) {
+        return errors.New("null bytes not allowed")
+    }
+
+    // Check for control characters
+    for _, r := range prompt {
+        if r < 32 && r != '\n' && r != '\r' && r != '\t' {
+            return errors.New("control characters not allowed")
+        }
+    }
+
+    return nil
+}
+```
+
+## Rate Limiting
+
+### Request Rate Limiting
+
+Token bucket algorithm limits requests per API key:
+
+```go
+type RateLimiter struct {
+    rate      rate.Limit  // Requests per second
+    burst     int         // Maximum burst size
+    limiters  sync.Map    // Per-key limiters
+}
+
+func (l *RateLimiter) Allow(key string) bool {
+    limiter := l.getLimiter(key)
+    return limiter.Allow()
+}
+```
+
+### Concurrent Command Limiting
+
+Limits active commands per project:
+
+```go
+type CommandLimiter struct {
+    maxConcurrent int
+    active        map[string]int
+    mu            sync.Mutex
+}
+
+func (l *CommandLimiter) TryAcquire(projectID string) bool {
+    l.mu.Lock()
+    defer l.mu.Unlock()
+
+    if l.active[projectID] >= l.maxConcurrent {
+        return false
+    }
+    l.active[projectID]++
+    return true
+}
+```
+
+### Rate Limit Headers
+
+Responses include rate limit information:
+
+```
+X-RateLimit-Limit: 10
+X-RateLimit-Remaining: 7
+X-RateLimit-Reset: 1642089600
+```
+
+## Network Security
+
+### Kubernetes Network Policy
+
+```yaml
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: rdev-api-policy
+spec:
+  podSelector:
+    matchLabels:
+      app: rdev-api
+  policyTypes:
+    - Ingress
+    - Egress
+  ingress:
+    - from:
+        - namespaceSelector:
+            matchLabels:
+              kubernetes.io/metadata.name: ingress-nginx
+  egress:
+    - to:
+        - namespaceSelector:
+            matchLabels:
+              kubernetes.io/metadata.name: databases
+      ports:
+        - protocol: TCP
+          port: 5432
+```
+
+### Pod Security
+
+```yaml
+securityContext:
+  runAsNonRoot: true
+  runAsUser: 1000
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  capabilities:
+    drop:
+      - ALL
+```
+
+## RBAC
+
+### Service Account Permissions
+
+```yaml
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: rdev-api-role
+rules:
+  # Read pods for project discovery
+  - apiGroups: [""]
+    resources: ["pods"]
+    verbs: ["get", "list", "watch"]
+  # Execute commands in pods
+  - apiGroups: [""]
+    resources: ["pods/exec"]
+    verbs: ["create"]
+  # Read ConfigMaps for project config
+  - apiGroups: [""]
+    resources: ["configmaps"]
+    verbs: ["get", "list", "watch"]
+```
+
+## Security Checklist
+
+### Development
+- [ ] All inputs sanitized before use
+- [ ] No secrets in code or logs
+- [ ] SQL injection protection (parameterized queries)
+- [ ] No command injection vectors
+
+### Deployment
+- [ ] TLS termination at ingress
+- [ ] Network policies applied
+- [ ] Pod security context configured
+- [ ] RBAC minimized to required permissions
+
+### Operations
+- [ ] API keys rotated regularly
+- [ ] Audit logs enabled
+- [ ] Rate limits configured appropriately
+- [ ] IP allowlists for sensitive keys
+
+## Incident Response
+
+### Key Compromise
+
+1. Revoke the compromised key immediately
+2. Review audit logs for unauthorized access
+3. Issue new key to affected user
+4. Investigate source of compromise
+
+### Rate Limit Abuse
+
+1. Identify abusing key from metrics
+2. Temporarily lower key's rate limit
+3. Contact key owner
+4. Consider IP-based blocking if severe
--- a/docs/architecture/streaming.md
+++ b/docs/architecture/streaming.md
@ -0,0 +1,324 @@
+# SSE Streaming Architecture
+
+rdev uses Server-Sent Events (SSE) for real-time command output streaming.
+
+## Overview
+
+```
+┌────────────┐                  ┌────────────┐                  ┌────────────┐
+│   Client   │                  │  rdev API  │                  │   K8s Pod  │
+│            │                  │            │                  │            │
+│  1. POST   │─────────────────▶│  Start     │─────────────────▶│  Execute   │
+│  /claude   │                  │  Command   │                  │  Command   │
+│            │◀─────────────────│            │                  │            │
+│  response: │  {id, stream_url}│            │                  │            │
+│            │                  │            │                  │            │
+│  2. GET    │─────────────────▶│  SSE       │◀─────────────────│  Output    │
+│  /events   │◀─────────────────│  Stream    │◀─────────────────│  Lines     │
+│            │  event: output   │            │                  │            │
+│            │  event: output   │            │                  │            │
+│            │  event: complete │            │◀─────────────────│  Exit      │
+└────────────┘                  └────────────┘                  └────────────┘
+```
+
+## SSE Protocol
+
+### Event Format
+
+```
+id: evt-001
+event: output
+data: {"line": "Hello, world!", "stream": "stdout"}
+
+id: evt-002
+event: output
+data: {"line": "Processing...", "stream": "stdout"}
+
+id: evt-003
+event: complete
+data: {"exit_code": 0, "duration_ms": 1234}
+```
+
+### Event Types
+
+| Event | Description | Data |
+|-------|-------------|------|
+| `connected` | Stream established | `{project, stream_id, reconnecting}` |
+| `output` | Command output line | `{line, stream}` |
+| `complete` | Command finished | `{exit_code, duration_ms}` |
+| `heartbeat` | Keep-alive signal | `{timestamp}` |
+| `error` | Error occurred | `{message}` |
+
+### Output Streams
+
+- `stdout` - Standard output
+- `stderr` - Standard error
+
+## Reconnection Support
+
+### Last-Event-ID
+
+Clients can reconnect and resume from where they left off:
+
+```
+GET /projects/test/events?stream_id=cmd-001
+Last-Event-ID: evt-002
+```
+
+The server replays all events after `evt-002`.
+
+### Implementation
+
+```go
+type StreamManager struct {
+    streams map[string]*Stream
+    mu      sync.RWMutex
+}
+
+type Stream struct {
+    events    []StreamEvent
+    listeners []chan StreamEvent
+    mu        sync.RWMutex
+}
+
+func (sm *StreamManager) SubscribeFromID(streamID, lastEventID string) (<-chan StreamEvent, func()) {
+    sm.mu.RLock()
+    stream := sm.streams[streamID]
+    sm.mu.RUnlock()
+
+    ch := make(chan StreamEvent, 100)
+
+    // Replay events after lastEventID
+    stream.mu.RLock()
+    foundLast := false
+    for _, event := range stream.events {
+        if event.ID == lastEventID {
+            foundLast = true
+            continue
+        }
+        if foundLast {
+            ch <- event
+        }
+    }
+    stream.mu.RUnlock()
+
+    // Subscribe for new events
+    stream.addListener(ch)
+
+    return ch, func() { stream.removeListener(ch) }
+}
+```
+
+### Client Handling
+
+```javascript
+// JavaScript SSE client with reconnection
+function connectSSE(url) {
+    const eventSource = new EventSource(url);
+
+    eventSource.onopen = () => {
+        console.log('Connected');
+    };
+
+    eventSource.addEventListener('output', (e) => {
+        const data = JSON.parse(e.data);
+        console.log(data.stream + ':', data.line);
+    });
+
+    eventSource.addEventListener('complete', (e) => {
+        const data = JSON.parse(e.data);
+        console.log('Exit code:', data.exit_code);
+        eventSource.close();
+    });
+
+    eventSource.onerror = (e) => {
+        console.log('Connection error, will auto-reconnect');
+        // Browser automatically reconnects with Last-Event-ID
+    };
+
+    return eventSource;
+}
+```
+
+## Stream Lifecycle
+
+```
+    ┌─────────────────────────────────────────────────────────┐
+    │                    Command Started                       │
+    └────────────────────────┬────────────────────────────────┘
+                             │
+                             ▼
+    ┌─────────────────────────────────────────────────────────┐
+    │                    Stream Created                        │
+    │                    (StreamManager)                       │
+    └────────────────────────┬────────────────────────────────┘
+                             │
+              ┌──────────────┴──────────────┐
+              │                             │
+              ▼                             ▼
+    ┌───────────────────┐         ┌───────────────────┐
+    │  Client Subscribe │         │  Output Events    │
+    │  (SSE Connection) │◀────────│  (from executor)  │
+    └───────────────────┘         └───────────────────┘
+              │                             │
+              │                             │
+              └──────────────┬──────────────┘
+                             │
+                             ▼
+    ┌─────────────────────────────────────────────────────────┐
+    │                   Complete Event                         │
+    │                   (exit_code)                            │
+    └────────────────────────┬────────────────────────────────┘
+                             │
+                             ▼
+    ┌─────────────────────────────────────────────────────────┐
+    │                   Stream Cleanup                         │
+    │              (after 30s grace period)                    │
+    └─────────────────────────────────────────────────────────┘
+```
+
+## Handler Implementation
+
+```go
+func (h *ProjectsHandler) Events(w http.ResponseWriter, r *http.Request) {
+    streamID := r.URL.Query().Get("stream_id")
+    lastEventID := r.Header.Get("Last-Event-ID")
+
+    // Set SSE headers
+    w.Header().Set("Content-Type", "text/event-stream")
+    w.Header().Set("Cache-Control", "no-cache")
+    w.Header().Set("Connection", "keep-alive")
+    w.Header().Set("Access-Control-Allow-Origin", "*")
+
+    flusher := w.(http.Flusher)
+
+    // Subscribe with reconnection support
+    var events <-chan StreamEvent
+    if lastEventID != "" {
+        events, cleanup = h.streams.SubscribeFromID(streamID, lastEventID)
+    } else {
+        events, cleanup = h.streams.Subscribe(streamID)
+    }
+    defer cleanup()
+
+    // Send connected event
+    writeSSE(w, flusher, "connected", map[string]any{
+        "stream_id":    streamID,
+        "reconnecting": lastEventID != "",
+    })
+
+    // Heartbeat ticker
+    heartbeat := time.NewTicker(30 * time.Second)
+    defer heartbeat.Stop()
+
+    // Event loop
+    for {
+        select {
+        case <-r.Context().Done():
+            return
+        case event, ok := <-events:
+            if !ok {
+                return
+            }
+            writeSSEWithID(w, flusher, event.ID, event.Type, event.Data)
+            if event.Type == "complete" {
+                return
+            }
+        case <-heartbeat.C:
+            writeSSE(w, flusher, "heartbeat", map[string]any{
+                "timestamp": time.Now().UTC().Format(time.RFC3339),
+            })
+        }
+    }
+}
+
+func writeSSEWithID(w http.ResponseWriter, flusher http.Flusher,
+    id, event string, data map[string]any) {
+
+    dataBytes, _ := json.Marshal(data)
+    if id != "" {
+        fmt.Fprintf(w, "id: %s\n", id)
+    }
+    fmt.Fprintf(w, "event: %s\n", event)
+    fmt.Fprintf(w, "data: %s\n\n", dataBytes)
+    flusher.Flush()
+}
+```
+
+## Performance Considerations
+
+### Buffer Sizing
+
+```go
+// 100-event buffer to handle bursts
+ch := make(chan StreamEvent, 100)
+```
+
+### Heartbeats
+
+30-second heartbeats prevent:
+- Proxy timeouts
+- Connection drops from inactive connections
+- Client uncertainty about connection state
+
+### Cleanup
+
+Streams are cleaned up 30 seconds after completion:
+- Allows time for reconnections
+- Prevents memory leaks
+- Enables late-arriving clients to see final state
+
+### Fanout
+
+Multiple clients can subscribe to the same stream:
+
+```go
+func (sm *StreamManager) Send(streamID, eventType string, data map[string]any) {
+    sm.mu.RLock()
+    stream := sm.streams[streamID]
+    sm.mu.RUnlock()
+
+    event := StreamEvent{
+        ID:   generateEventID(),
+        Type: eventType,
+        Data: data,
+    }
+
+    // Store for replay
+    stream.addEvent(event)
+
+    // Fanout to all listeners
+    stream.mu.RLock()
+    for _, ch := range stream.listeners {
+        select {
+        case ch <- event:
+        default:
+            // Channel full, skip (client too slow)
+        }
+    }
+    stream.mu.RUnlock()
+}
+```
+
+## Error Handling
+
+### Connection Errors
+
+SSE automatically reconnects on error. The browser:
+1. Closes failed connection
+2. Waits 3 seconds (configurable)
+3. Reconnects with `Last-Event-ID`
+
+### Slow Clients
+
+If a client can't keep up:
+1. Events are dropped (non-blocking send)
+2. Client eventually catches up via replay on reconnect
+
+### Stream Not Found
+
+If stream doesn't exist (expired or never created):
+```
+event: error
+data: {"message": "stream not found"}
+```
--- a/docs/operations/deployment.md
+++ b/docs/operations/deployment.md
@ -0,0 +1,394 @@
+# Deployment Guide
+
+This guide covers deploying rdev API to a Kubernetes cluster.
+
+## Prerequisites
+
+- Kubernetes cluster (1.24+)
+- kubectl configured
+- PostgreSQL database
+- Container registry access
+
+## Quick Deploy
+
+```bash
+# Apply all manifests
+kubectl apply -k deployments/k8s/base/
+
+# Verify deployment
+kubectl -n rdev get pods
+kubectl -n rdev get svc
+```
+
+## Configuration
+
+### Environment Variables
+
+| Variable | Description | Required | Default |
+|----------|-------------|----------|---------|
+| `PORT` | HTTP server port | No | 8080 |
+| `POSTGRES_HOST` | Database host | Yes | - |
+| `POSTGRES_PORT` | Database port | No | 5432 |
+| `POSTGRES_USER` | Database user | Yes | - |
+| `POSTGRES_PASSWORD` | Database password | Yes | - |
+| `POSTGRES_DB` | Database name | No | rdev |
+| `RDEV_NAMESPACE` | K8s namespace for pods | No | default |
+| `RATE_LIMIT_RPS` | Requests per second | No | 10 |
+| `CONCURRENT_COMMANDS` | Max concurrent commands | No | 5 |
+
+### Secrets
+
+Create a secret for database credentials:
+
+```bash
+kubectl -n rdev create secret generic rdev-api-secrets \
+  --from-literal=postgres-password=your-password
+```
+
+Or use the manifest:
+
+```yaml
+apiVersion: v1
+kind: Secret
+metadata:
+  name: rdev-api-secrets
+  namespace: rdev
+type: Opaque
+stringData:
+  postgres-password: your-secure-password
+```
+
+### ConfigMap
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: rdev-api-config
+  namespace: rdev
+data:
+  POSTGRES_HOST: "postgres.databases.svc"
+  POSTGRES_DB: "rdev"
+  RDEV_NAMESPACE: "rdev"
+  RATE_LIMIT_RPS: "10"
+  CONCURRENT_COMMANDS: "5"
+```
+
+## Kubernetes Manifests
+
+### Namespace
+
+```yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: rdev
+  labels:
+    app.kubernetes.io/name: rdev
+```
+
+### Deployment
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: rdev-api
+  namespace: rdev
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: rdev-api
+  template:
+    metadata:
+      labels:
+        app: rdev-api
+    spec:
+      serviceAccountName: rdev-api
+      securityContext:
+        runAsNonRoot: true
+        runAsUser: 1000
+      containers:
+        - name: rdev-api
+          image: your-registry/rdev-api:latest
+          ports:
+            - containerPort: 8080
+          env:
+            - name: POSTGRES_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: rdev-api-secrets
+                  key: postgres-password
+          envFrom:
+            - configMapRef:
+                name: rdev-api-config
+          securityContext:
+            readOnlyRootFilesystem: true
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+                - ALL
+          resources:
+            requests:
+              memory: "128Mi"
+              cpu: "100m"
+            limits:
+              memory: "512Mi"
+              cpu: "500m"
+          livenessProbe:
+            httpGet:
+              path: /health
+              port: 8080
+            initialDelaySeconds: 5
+            periodSeconds: 10
+          readinessProbe:
+            httpGet:
+              path: /ready
+              port: 8080
+            initialDelaySeconds: 5
+            periodSeconds: 5
+```
+
+### Service
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: rdev-api
+  namespace: rdev
+spec:
+  selector:
+    app: rdev-api
+  ports:
+    - port: 80
+      targetPort: 8080
+```
+
+### Ingress
+
+```yaml
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: rdev-api
+  namespace: rdev
+  annotations:
+    nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
+    nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
+spec:
+  ingressClassName: nginx
+  rules:
+    - host: rdev.example.com
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: rdev-api
+                port:
+                  number: 80
+  tls:
+    - hosts:
+        - rdev.example.com
+      secretName: rdev-tls
+```
+
+### RBAC
+
+```yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: rdev-api
+  namespace: rdev
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: rdev-api-role
+  namespace: rdev
+rules:
+  - apiGroups: [""]
+    resources: ["pods"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: [""]
+    resources: ["pods/exec"]
+    verbs: ["create"]
+  - apiGroups: [""]
+    resources: ["configmaps"]
+    verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: rdev-api-binding
+  namespace: rdev
+subjects:
+  - kind: ServiceAccount
+    name: rdev-api
+roleRef:
+  kind: Role
+  name: rdev-api-role
+  apiGroup: rbac.authorization.k8s.io
+```
+
+### Pod Disruption Budget
+
+```yaml
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: rdev-api-pdb
+  namespace: rdev
+spec:
+  minAvailable: 1
+  selector:
+    matchLabels:
+      app: rdev-api
+```
+
+### Network Policy
+
+```yaml
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: rdev-api-policy
+  namespace: rdev
+spec:
+  podSelector:
+    matchLabels:
+      app: rdev-api
+  policyTypes:
+    - Ingress
+    - Egress
+  ingress:
+    - from:
+        - namespaceSelector:
+            matchLabels:
+              kubernetes.io/metadata.name: ingress-nginx
+  egress:
+    - to:
+        - namespaceSelector:
+            matchLabels:
+              kubernetes.io/metadata.name: databases
+      ports:
+        - protocol: TCP
+          port: 5432
+```
+
+## Database Setup
+
+### Create Database
+
+```sql
+CREATE DATABASE rdev;
+CREATE USER rdev_user WITH PASSWORD 'secure-password';
+GRANT ALL PRIVILEGES ON DATABASE rdev TO rdev_user;
+```
+
+### Migrations
+
+Migrations run automatically on startup. To run manually:
+
+```bash
+# Connect to pod
+kubectl -n rdev exec -it deployment/rdev-api -- sh
+
+# Check migration status
+psql $DATABASE_URL -c "SELECT * FROM schema_migrations;"
+```
+
+## Scaling
+
+### Horizontal Pod Autoscaler
+
+```yaml
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: rdev-api-hpa
+  namespace: rdev
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: rdev-api
+  minReplicas: 2
+  maxReplicas: 10
+  metrics:
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: 70
+```
+
+## Upgrading
+
+### Rolling Update
+
+```bash
+# Update image
+kubectl -n rdev set image deployment/rdev-api \
+  rdev-api=your-registry/rdev-api:new-version
+
+# Watch rollout
+kubectl -n rdev rollout status deployment/rdev-api
+```
+
+### Rollback
+
+```bash
+# Rollback to previous version
+kubectl -n rdev rollout undo deployment/rdev-api
+
+# Rollback to specific revision
+kubectl -n rdev rollout undo deployment/rdev-api --to-revision=2
+```
+
+## Health Checks
+
+### Liveness
+
+```bash
+curl http://rdev-api/health
+```
+
+Returns `200 OK` if the service is running.
+
+### Readiness
+
+```bash
+curl http://rdev-api/ready
+```
+
+Returns `200 OK` if database and K8s are connected.
+
+## Troubleshooting
+
+### Pod Not Starting
+
+```bash
+# Check pod events
+kubectl -n rdev describe pod -l app=rdev-api
+
+# Check logs
+kubectl -n rdev logs -l app=rdev-api
+```
+
+### Database Connection Failed
+
+1. Check secret is mounted correctly
+2. Verify database host is reachable
+3. Check network policy allows egress
+
+### K8s API Errors
+
+1. Verify ServiceAccount has correct RBAC
+2. Check namespace configuration
+3. Verify API server connectivity
--- a/docs/operations/monitoring.md
+++ b/docs/operations/monitoring.md
@ -0,0 +1,348 @@
+# Monitoring Guide
+
+This guide covers monitoring rdev API with Prometheus and Grafana.
+
+## Metrics Endpoint
+
+rdev exposes Prometheus metrics at `/metrics`:
+
+```bash
+curl http://rdev-api:8080/metrics
+```
+
+## Available Metrics
+
+### HTTP Metrics
+
+| Metric | Type | Description |
+|--------|------|-------------|
+| `http_requests_total` | Counter | Total HTTP requests |
+| `http_request_duration_seconds` | Histogram | Request latency |
+| `http_requests_in_flight` | Gauge | Current active requests |
+
+Labels: `method`, `path`, `status`
+
+### Command Metrics
+
+| Metric | Type | Description |
+|--------|------|-------------|
+| `rdev_commands_total` | Counter | Total commands executed |
+| `rdev_commands_active` | Gauge | Currently running commands |
+| `rdev_command_duration_seconds` | Histogram | Command execution time |
+
+Labels: `project`, `type` (claude/shell/git), `status`
+
+### SSE Metrics
+
+| Metric | Type | Description |
+|--------|------|-------------|
+| `rdev_sse_connections_total` | Counter | Total SSE connections |
+| `rdev_sse_connections_active` | Gauge | Active SSE connections |
+| `rdev_sse_events_sent_total` | Counter | Total events sent |
+
+Labels: `project`, `event_type`
+
+### Auth Metrics
+
+| Metric | Type | Description |
+|--------|------|-------------|
+| `rdev_auth_requests_total` | Counter | Auth attempts |
+| `rdev_auth_failures_total` | Counter | Auth failures |
+
+Labels: `reason` (invalid, revoked, expired, ip_blocked)
+
+### Rate Limit Metrics
+
+| Metric | Type | Description |
+|--------|------|-------------|
+| `rdev_ratelimit_requests_total` | Counter | Rate limit checks |
+| `rdev_ratelimit_rejected_total` | Counter | Rejected requests |
+
+## Prometheus Configuration
+
+### ServiceMonitor (Prometheus Operator)
+
+```yaml
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: rdev-api
+  namespace: rdev
+  labels:
+    app: rdev-api
+spec:
+  selector:
+    matchLabels:
+      app: rdev-api
+  endpoints:
+    - port: http
+      path: /metrics
+      interval: 15s
+```
+
+### Static Config
+
+```yaml
+scrape_configs:
+  - job_name: 'rdev-api'
+    kubernetes_sd_configs:
+      - role: endpoints
+        namespaces:
+          names:
+            - rdev
+    relabel_configs:
+      - source_labels: [__meta_kubernetes_service_label_app]
+        regex: rdev-api
+        action: keep
+      - source_labels: [__meta_kubernetes_endpoint_port_name]
+        regex: http
+        action: keep
+```
+
+## Grafana Dashboards
+
+### Overview Dashboard
+
+```json
+{
+  "title": "rdev API Overview",
+  "panels": [
+    {
+      "title": "Request Rate",
+      "type": "graph",
+      "targets": [
+        {
+          "expr": "rate(http_requests_total{job=\"rdev-api\"}[5m])",
+          "legendFormat": "{{method}} {{path}}"
+        }
+      ]
+    },
+    {
+      "title": "Latency P99",
+      "type": "graph",
+      "targets": [
+        {
+          "expr": "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket{job=\"rdev-api\"}[5m]))",
+          "legendFormat": "p99"
+        }
+      ]
+    },
+    {
+      "title": "Error Rate",
+      "type": "graph",
+      "targets": [
+        {
+          "expr": "rate(http_requests_total{job=\"rdev-api\",status=~\"5..\"}[5m])",
+          "legendFormat": "5xx errors"
+        }
+      ]
+    },
+    {
+      "title": "Active Commands",
+      "type": "gauge",
+      "targets": [
+        {
+          "expr": "rdev_commands_active",
+          "legendFormat": "{{project}}"
+        }
+      ]
+    }
+  ]
+}
+```
+
+### Key PromQL Queries
+
+**Request rate by endpoint:**
+```promql
+rate(http_requests_total{job="rdev-api"}[5m])
+```
+
+**P99 latency:**
+```promql
+histogram_quantile(0.99, rate(http_request_duration_seconds_bucket{job="rdev-api"}[5m]))
+```
+
+**Error rate percentage:**
+```promql
+100 * rate(http_requests_total{job="rdev-api",status=~"5.."}[5m])
+/ rate(http_requests_total{job="rdev-api"}[5m])
+```
+
+**Command execution rate:**
+```promql
+rate(rdev_commands_total{job="rdev-api"}[5m])
+```
+
+**Average command duration:**
+```promql
+rate(rdev_command_duration_seconds_sum[5m])
+/ rate(rdev_command_duration_seconds_count[5m])
+```
+
+## Alerting
+
+### PrometheusRule
+
+```yaml
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  name: rdev-api-alerts
+  namespace: rdev
+spec:
+  groups:
+    - name: rdev-api
+      rules:
+        - alert: RdevAPIHighErrorRate
+          expr: |
+            rate(http_requests_total{job="rdev-api",status=~"5.."}[5m])
+            / rate(http_requests_total{job="rdev-api"}[5m]) > 0.05
+          for: 5m
+          labels:
+            severity: critical
+          annotations:
+            summary: "rdev API error rate > 5%"
+            description: "Error rate is {{ $value | humanizePercentage }}"
+
+        - alert: RdevAPIHighLatency
+          expr: |
+            histogram_quantile(0.99, rate(http_request_duration_seconds_bucket{job="rdev-api"}[5m])) > 2
+          for: 5m
+          labels:
+            severity: warning
+          annotations:
+            summary: "rdev API p99 latency > 2s"
+            description: "P99 latency is {{ $value | humanizeDuration }}"
+
+        - alert: RdevAPIPodDown
+          expr: up{job="rdev-api"} == 0
+          for: 1m
+          labels:
+            severity: critical
+          annotations:
+            summary: "rdev API pod is down"
+
+        - alert: RdevAPIHighCommandQueue
+          expr: rdev_commands_active > 4
+          for: 5m
+          labels:
+            severity: warning
+          annotations:
+            summary: "High number of active commands"
+            description: "{{ $value }} commands currently running"
+
+        - alert: RdevAPIHighRateLimit
+          expr: |
+            rate(rdev_ratelimit_rejected_total[5m])
+            / rate(rdev_ratelimit_requests_total[5m]) > 0.1
+          for: 5m
+          labels:
+            severity: warning
+          annotations:
+            summary: "High rate limit rejection rate"
+```
+
+## Logging
+
+### Log Format
+
+rdev uses structured JSON logging:
+
+```json
+{
+  "level": "info",
+  "time": "2024-01-15T10:30:00Z",
+  "msg": "request completed",
+  "request_id": "req-abc123",
+  "method": "POST",
+  "path": "/projects/test/claude",
+  "status": 201,
+  "duration_ms": 45,
+  "client_ip": "10.0.0.1"
+}
+```
+
+### Log Levels
+
+| Level | Description |
+|-------|-------------|
+| `debug` | Detailed debugging info |
+| `info` | Normal operations |
+| `warn` | Potential issues |
+| `error` | Errors requiring attention |
+
+### Loki/Promtail
+
+```yaml
+# promtail config
+scrape_configs:
+  - job_name: rdev-api
+    kubernetes_sd_configs:
+      - role: pod
+    relabel_configs:
+      - source_labels: [__meta_kubernetes_pod_label_app]
+        regex: rdev-api
+        action: keep
+    pipeline_stages:
+      - json:
+          expressions:
+            level: level
+            request_id: request_id
+            path: path
+            status: status
+      - labels:
+          level:
+          path:
+```
+
+### LogQL Queries
+
+**Errors in last hour:**
+```logql
+{app="rdev-api"} |= "error"
+```
+
+**Slow requests:**
+```logql
+{app="rdev-api"} | json | duration_ms > 1000
+```
+
+**Requests by status:**
+```logql
+sum by (status) (count_over_time({app="rdev-api"} | json [1h]))
+```
+
+## Health Checks
+
+### Liveness
+
+```bash
+curl http://rdev-api:8080/health
+# Returns 200 if process is alive
+```
+
+### Readiness
+
+```bash
+curl http://rdev-api:8080/ready
+# Returns 200 if ready to serve traffic
+# Checks: database connectivity, K8s API access
+```
+
+Response:
+```json
+{
+  "status": "healthy",
+  "checks": {
+    "database": {
+      "status": "healthy",
+      "latency_ms": 5
+    },
+    "kubernetes": {
+      "status": "healthy",
+      "latency_ms": 12
+    }
+  }
+}
+```
--- a/docs/operations/runbooks/auth-failures.md
+++ b/docs/operations/runbooks/auth-failures.md
@ -0,0 +1,141 @@
+# Runbook: Authentication Failures
+
+## Alert
+
+**RdevAPIAuthFailures**: High rate of authentication failures
+
+## Impact
+
+- Legitimate users unable to access API
+- Potential security incident (brute force)
+- Service degradation
+
+## Investigation
+
+### 1. Confirm the Issue
+
+```bash
+# Check auth failure metrics
+curl -s http://rdev-api:8080/metrics | grep auth_failures
+
+# Check auth logs
+kubectl -n rdev logs -l app=rdev-api --since=10m | grep -E "(UNAUTHORIZED|KEY_REVOKED|KEY_EXPIRED|IP_NOT_ALLOWED)"
+```
+
+### 2. Identify Failure Type
+
+```bash
+# Count by failure reason
+kubectl -n rdev logs -l app=rdev-api --since=10m | \
+  grep -oE '"code":"[^"]+' | sort | uniq -c | sort -rn
+```
+
+Common reasons:
+- `UNAUTHORIZED` - Invalid or missing key
+- `KEY_REVOKED` - Key was revoked
+- `KEY_EXPIRED` - Key has expired
+- `IP_NOT_ALLOWED` - IP not in allowlist
+
+### 3. Check for Attack Patterns
+
+```bash
+# Check unique IPs making failed requests
+kubectl -n rdev logs -l app=rdev-api --since=10m | \
+  grep UNAUTHORIZED | grep -oE '"client_ip":"[^"]+' | sort | uniq -c | sort -rn
+
+# Check request patterns
+kubectl -n rdev logs -l app=rdev-api --since=10m | \
+  grep UNAUTHORIZED | grep -oE '"path":"[^"]+' | sort | uniq -c | sort -rn
+```
+
+## Remediation
+
+### If Keys Are Invalid (UNAUTHORIZED)
+
+1. Verify keys exist in database:
+   ```bash
+   kubectl -n rdev exec -it deployment/rdev-api -- sh
+   psql $DATABASE_URL -c "SELECT id, name, key_prefix, revoked_at FROM api_keys;"
+   ```
+
+2. Help users create new keys if needed
+
+3. If brute force detected:
+   - Block offending IPs at ingress level
+   - Increase rate limiting
+
+### If Keys Are Revoked (KEY_REVOKED)
+
+1. Check who revoked and when:
+   ```sql
+   SELECT id, name, revoked_at, revoked_by FROM api_keys WHERE revoked_at IS NOT NULL;
+   ```
+
+2. Determine if revocation was intentional
+
+3. Issue new keys to affected users if legitimate
+
+### If Keys Are Expired (KEY_EXPIRED)
+
+1. Check which keys expired:
+   ```sql
+   SELECT id, name, expires_at FROM api_keys WHERE expires_at < NOW();
+   ```
+
+2. Issue new keys to affected users
+
+3. Consider extending default expiration if too short
+
+### If IP Not Allowed (IP_NOT_ALLOWED)
+
+1. Check which keys have IP restrictions:
+   ```sql
+   SELECT id, name, allowed_ips FROM api_keys WHERE allowed_ips IS NOT NULL;
+   ```
+
+2. Verify client IPs match allowlist
+
+3. Update allowlist if legitimate IPs changed:
+   - Cloud provider IP ranges change
+   - User moved networks
+
+### If Under Attack
+
+1. **Immediate**: Block at ingress
+   ```yaml
+   # Add to ingress annotations
+   nginx.ingress.kubernetes.io/whitelist-source-range: "10.0.0.0/8,192.168.0.0/16"
+   ```
+
+2. **Short-term**: Increase rate limits
+   ```bash
+   kubectl -n rdev set env deployment/rdev-api RATE_LIMIT_RPS=2
+   ```
+
+3. **Long-term**:
+   - Implement IP-based blocking
+   - Add fail2ban-style lockout
+   - Review API key issuance process
+
+## Verification
+
+```bash
+# Check auth success rate
+curl -s http://rdev-api:8080/metrics | grep -E "auth_(requests|failures)"
+
+# Test authentication
+curl -H "X-API-Key: $VALID_KEY" http://rdev-api:8080/projects
+
+# Check logs for successful auths
+kubectl -n rdev logs -l app=rdev-api --since=5m | grep "request completed" | head -5
+```
+
+## Post-Incident
+
+1. Review auth failure patterns
+2. Update IP allowlists if needed
+3. Communicate with affected users
+4. Consider additional security measures:
+   - API key rotation policy
+   - Automated key expiration alerts
+   - IP-based anomaly detection
--- a/docs/operations/runbooks/high-cpu.md
+++ b/docs/operations/runbooks/high-cpu.md
@ -0,0 +1,112 @@
+# Runbook: High CPU Usage
+
+## Alert
+
+**RdevAPIHighCPU**: CPU usage exceeds 80% for 5+ minutes
+
+## Impact
+
+- Slow request processing
+- Increased latency
+- Potential request timeouts
+
+## Investigation
+
+### 1. Confirm the Issue
+
+```bash
+# Check current CPU usage
+kubectl -n rdev top pod -l app=rdev-api
+
+# Check CPU throttling
+kubectl -n rdev get pod -l app=rdev-api -o jsonpath='{.items[*].status.containerStatuses[*].lastState}'
+```
+
+### 2. Identify the Cause
+
+```bash
+# Check request rate
+curl -s http://rdev-api:8080/metrics | grep http_requests_total
+
+# Check active commands
+curl -s http://rdev-api:8080/metrics | grep commands_active
+
+# Check logs for errors
+kubectl -n rdev logs -l app=rdev-api --since=5m | grep -i error
+```
+
+### 3. Check for Hot Paths
+
+If possible, capture a CPU profile:
+
+```bash
+# Start 30-second profile
+kubectl -n rdev exec -it deployment/rdev-api -- \
+  curl -o /tmp/cpu.prof localhost:8080/debug/pprof/profile?seconds=30
+
+# Copy profile locally
+kubectl -n rdev cp deployment/rdev-api:/tmp/cpu.prof cpu.prof
+
+# Analyze
+go tool pprof cpu.prof
+```
+
+## Remediation
+
+### Immediate: Scale Up
+
+```bash
+# Increase replicas
+kubectl -n rdev scale deployment/rdev-api --replicas=4
+
+# Verify new pods are running
+kubectl -n rdev get pods -l app=rdev-api -w
+```
+
+### Short-term: Increase Limits
+
+If throttling is occurring but not OOM:
+
+```bash
+kubectl -n rdev patch deployment rdev-api --type='json' -p='[
+  {"op": "replace", "path": "/spec/template/spec/containers/0/resources/limits/cpu", "value": "1000m"}
+]'
+```
+
+### If Caused by Command Load
+
+1. Reduce concurrent command limit:
+   ```bash
+   kubectl -n rdev set env deployment/rdev-api CONCURRENT_COMMANDS=3
+   ```
+
+2. Investigate which commands are heavy:
+   ```bash
+   kubectl -n rdev logs -l app=rdev-api | grep "command started" | tail -20
+   ```
+
+### If Caused by Request Volume
+
+1. Lower rate limits temporarily:
+   ```bash
+   kubectl -n rdev set env deployment/rdev-api RATE_LIMIT_RPS=5
+   ```
+
+2. Identify high-volume clients from logs
+
+## Verification
+
+```bash
+# Confirm CPU has stabilized
+kubectl -n rdev top pod -l app=rdev-api
+
+# Check request latency is normal
+curl -s http://rdev-api:8080/metrics | grep request_duration
+```
+
+## Post-Incident
+
+1. Review capacity planning
+2. Consider enabling HPA if not already
+3. Analyze traffic patterns
+4. Update resource requests/limits
--- a/docs/operations/runbooks/high-memory.md
+++ b/docs/operations/runbooks/high-memory.md
@ -0,0 +1,117 @@
+# Runbook: High Memory Usage
+
+## Alert
+
+**RdevAPIHighMemory**: Memory usage exceeds 80% of limit
+
+## Impact
+
+- Risk of OOMKill
+- Service disruption
+- Lost in-flight requests
+
+## Investigation
+
+### 1. Confirm the Issue
+
+```bash
+# Check current memory usage
+kubectl -n rdev top pod -l app=rdev-api
+
+# Check for OOMKilled events
+kubectl -n rdev get events --field-selector reason=OOMKilled
+
+# Check pod restarts
+kubectl -n rdev get pods -l app=rdev-api -o jsonpath='{.items[*].status.containerStatuses[*].restartCount}'
+```
+
+### 2. Identify the Cause
+
+```bash
+# Check active SSE connections (potential memory leak source)
+curl -s http://rdev-api:8080/metrics | grep sse_connections_active
+
+# Check active commands
+curl -s http://rdev-api:8080/metrics | grep commands_active
+
+# Check heap profile
+kubectl -n rdev exec -it deployment/rdev-api -- \
+  curl -o /tmp/heap.prof localhost:8080/debug/pprof/heap
+```
+
+### 3. Common Causes
+
+- **SSE connection leaks**: Clients not closing connections properly
+- **Large command outputs**: Commands producing excessive output
+- **Many concurrent commands**: Each command buffers output
+- **Cache growth**: Project cache not expiring
+
+## Remediation
+
+### Immediate: Restart Pod
+
+If memory is critical (>95%):
+
+```bash
+# Restart specific pod
+kubectl -n rdev delete pod <pod-name>
+
+# Or restart all pods rolling
+kubectl -n rdev rollout restart deployment/rdev-api
+```
+
+### Short-term: Increase Limits
+
+```bash
+kubectl -n rdev patch deployment rdev-api --type='json' -p='[
+  {"op": "replace", "path": "/spec/template/spec/containers/0/resources/limits/memory", "value": "1Gi"}
+]'
+```
+
+### If SSE Connections Are Leaking
+
+1. Check for stuck connections:
+   ```bash
+   kubectl -n rdev logs -l app=rdev-api | grep "SSE connection" | tail -50
+   ```
+
+2. Reduce connection timeout in ingress:
+   ```yaml
+   nginx.ingress.kubernetes.io/proxy-read-timeout: "1800"  # 30 min max
+   ```
+
+### If Command Output Is Too Large
+
+1. Commands should implement output limits
+2. Check for runaway commands:
+   ```bash
+   kubectl -n rdev logs -l app=rdev-api | grep "output line" | wc -l
+   ```
+
+### If Cache Is Growing
+
+1. Reduce cache TTL:
+   ```bash
+   kubectl -n rdev set env deployment/rdev-api CACHE_TTL=15s
+   ```
+
+## Verification
+
+```bash
+# Confirm memory has stabilized
+kubectl -n rdev top pod -l app=rdev-api
+
+# Check no new OOMKill events
+kubectl -n rdev get events --field-selector reason=OOMKilled --since=5m
+
+# Verify service is healthy
+curl -s http://rdev-api:8080/ready
+```
+
+## Post-Incident
+
+1. Analyze heap profile for memory leaks
+2. Review SSE connection lifecycle
+3. Consider implementing output size limits
+4. Update memory limits based on findings
+5. Consider adding memory-based HPA
--- a/docs/operations/runbooks/pod-not-found.md
+++ b/docs/operations/runbooks/pod-not-found.md
@ -0,0 +1,141 @@
+# Runbook: Pod Not Found
+
+## Alert
+
+**RdevAPIProjectNotFound**: Project pod not found errors increasing
+
+## Impact
+
+- Users cannot execute commands on their projects
+- API returns 404 for valid project IDs
+
+## Investigation
+
+### 1. Confirm the Issue
+
+```bash
+# Check for NOT_FOUND errors in logs
+kubectl -n rdev logs -l app=rdev-api --since=10m | grep "project not found"
+
+# Check metrics
+curl -s http://rdev-api:8080/metrics | grep 'http_requests_total.*status="404"'
+```
+
+### 2. Verify Target Pods Exist
+
+```bash
+# List all project pods
+kubectl -n rdev get pods -l rdev.orchard9.ai/project=true
+
+# Check specific project
+kubectl -n rdev get pods -l rdev.orchard9.ai/project-id=<project-id>
+```
+
+### 3. Check Pod Discovery
+
+```bash
+# Verify API can see pods
+kubectl -n rdev exec -it deployment/rdev-api -- sh
+curl localhost:8080/projects
+
+# Check RBAC permissions
+kubectl auth can-i list pods -n rdev --as=system:serviceaccount:rdev:rdev-api
+```
+
+### 4. Common Causes
+
+- **Pod terminated**: Project pod was deleted or crashed
+- **Wrong namespace**: API looking in wrong namespace
+- **Missing labels**: Pod missing required labels
+- **RBAC issues**: API can't list pods
+- **Cache stale**: Project list cache is outdated
+
+## Remediation
+
+### If Pod Is Missing
+
+1. Check if pod should exist:
+   ```bash
+   kubectl -n rdev get deployments
+   ```
+
+2. Recreate if needed:
+   ```bash
+   kubectl -n rdev apply -f <project-deployment.yaml>
+   ```
+
+### If Labels Are Wrong
+
+1. Check current labels:
+   ```bash
+   kubectl -n rdev get pod <pod-name> --show-labels
+   ```
+
+2. Add required labels:
+   ```bash
+   kubectl -n rdev label pod <pod-name> rdev.orchard9.ai/project=true
+   kubectl -n rdev label pod <pod-name> rdev.orchard9.ai/project-id=<project-id>
+   ```
+
+### If RBAC Is Broken
+
+1. Verify ServiceAccount:
+   ```bash
+   kubectl -n rdev get serviceaccount rdev-api
+   ```
+
+2. Check RoleBinding:
+   ```bash
+   kubectl -n rdev get rolebinding rdev-api-binding -o yaml
+   ```
+
+3. Reapply RBAC:
+   ```bash
+   kubectl apply -f deployments/k8s/base/rdev-api.yaml
+   ```
+
+### If Cache Is Stale
+
+1. Force cache refresh by restarting:
+   ```bash
+   kubectl -n rdev rollout restart deployment/rdev-api
+   ```
+
+2. Or reduce cache TTL:
+   ```bash
+   kubectl -n rdev set env deployment/rdev-api CACHE_TTL=5s
+   ```
+
+### If Wrong Namespace
+
+1. Check configured namespace:
+   ```bash
+   kubectl -n rdev get deployment rdev-api -o jsonpath='{.spec.template.spec.containers[0].env}' | jq
+   ```
+
+2. Update if wrong:
+   ```bash
+   kubectl -n rdev set env deployment/rdev-api RDEV_NAMESPACE=rdev
+   ```
+
+## Verification
+
+```bash
+# List projects from API
+curl -H "X-API-Key: $API_KEY" http://rdev-api:8080/projects
+
+# Get specific project
+curl -H "X-API-Key: $API_KEY" http://rdev-api:8080/projects/<project-id>
+
+# Execute test command
+curl -X POST -H "X-API-Key: $API_KEY" -H "Content-Type: application/json" \
+  http://rdev-api:8080/projects/<project-id>/shell \
+  -d '{"command": "echo hello"}'
+```
+
+## Post-Incident
+
+1. Review pod lifecycle management
+2. Consider adding pod status monitoring
+3. Review label conventions
+4. Add alerts for project pod terminations
--- a/docs/operations/troubleshooting.md
+++ b/docs/operations/troubleshooting.md
@ -0,0 +1,303 @@
+# Troubleshooting Guide
+
+Common issues and their resolutions for rdev API.
+
+## Quick Diagnostics
+
+```bash
+# Check pod status
+kubectl -n rdev get pods -l app=rdev-api
+
+# Check logs
+kubectl -n rdev logs -l app=rdev-api --tail=100
+
+# Check events
+kubectl -n rdev get events --sort-by='.lastTimestamp'
+
+# Check endpoints
+kubectl -n rdev get endpoints rdev-api
+
+# Test health
+kubectl -n rdev exec -it deployment/rdev-api -- wget -qO- localhost:8080/health
+```
+
+## Common Issues
+
+### Pod Not Starting
+
+**Symptoms:**
+- Pod stuck in `Pending` or `CrashLoopBackOff`
+- No endpoints registered
+
+**Diagnosis:**
+```bash
+kubectl -n rdev describe pod -l app=rdev-api
+kubectl -n rdev logs -l app=rdev-api --previous
+```
+
+**Common Causes:**
+
+1. **Missing secrets:**
+   ```
+   Error: secret "rdev-api-secrets" not found
+   ```
+   Fix: Create the required secret
+   ```bash
+   kubectl -n rdev create secret generic rdev-api-secrets \
+     --from-literal=postgres-password=xxx
+   ```
+
+2. **Resource constraints:**
+   ```
+   0/3 nodes are available: insufficient memory
+   ```
+   Fix: Reduce resource requests or add nodes
+
+3. **Image pull errors:**
+   ```
+   Failed to pull image "registry/rdev-api:latest"
+   ```
+   Fix: Check image name, registry credentials
+
+### Database Connection Failed
+
+**Symptoms:**
+- Readiness probe failing
+- Logs show `dial tcp: connection refused`
+
+**Diagnosis:**
+```bash
+# Check database connectivity from pod
+kubectl -n rdev exec -it deployment/rdev-api -- sh
+nc -zv postgres.databases.svc 5432
+```
+
+**Common Causes:**
+
+1. **Wrong host/port:**
+   Check ConfigMap values match actual database
+
+2. **Network policy blocking:**
+   ```bash
+   kubectl -n rdev get networkpolicy
+   ```
+   Ensure egress to database namespace is allowed
+
+3. **Credentials incorrect:**
+   Verify secret values match database credentials
+
+### Authentication Failures
+
+**Symptoms:**
+- All requests return 401
+- Logs show `invalid API key`
+
+**Diagnosis:**
+```bash
+# Check if keys exist in database
+kubectl -n rdev exec -it deployment/rdev-api -- sh
+psql $DATABASE_URL -c "SELECT id, name, revoked_at FROM api_keys LIMIT 10;"
+```
+
+**Common Causes:**
+
+1. **Key not created:**
+   Create an admin key manually if needed
+
+2. **Key revoked:**
+   Check `revoked_at` is NULL for the key
+
+3. **Wrong key format:**
+   Keys must start with `rdev_`
+
+### Rate Limiting Issues
+
+**Symptoms:**
+- Intermittent 429 responses
+- `X-RateLimit-Remaining: 0`
+
+**Diagnosis:**
+```bash
+# Check rate limit metrics
+curl http://rdev-api:8080/metrics | grep ratelimit
+```
+
+**Solutions:**
+
+1. **Increase limits:**
+   Update ConfigMap:
+   ```yaml
+   RATE_LIMIT_RPS: "20"
+   ```
+
+2. **Check for loops:**
+   Client may be making excessive requests
+
+3. **Use separate keys:**
+   Different clients should use different API keys
+
+### Command Execution Timeouts
+
+**Symptoms:**
+- Commands hang indefinitely
+- SSE stream never completes
+
+**Diagnosis:**
+```bash
+# Check active commands
+kubectl -n rdev exec -it deployment/rdev-api -- sh
+curl localhost:8080/metrics | grep commands_active
+
+# Check target pod
+kubectl -n rdev get pod <target-pod> -o wide
+kubectl -n rdev exec -it <target-pod> -- ps aux
+```
+
+**Common Causes:**
+
+1. **Target pod not running:**
+   ```bash
+   kubectl -n rdev get pods -l rdev.orchard9.ai/project=true
+   ```
+
+2. **Command actually slow:**
+   Some commands take a long time legitimately
+
+3. **Network issues:**
+   Check connectivity between API pod and target pod
+
+### SSE Connection Drops
+
+**Symptoms:**
+- Clients disconnect unexpectedly
+- Events stop arriving mid-command
+
+**Diagnosis:**
+```bash
+# Check ingress timeout settings
+kubectl -n ingress-nginx get ing rdev-api -o yaml
+```
+
+**Common Causes:**
+
+1. **Proxy timeout:**
+   Ensure ingress has long timeout:
+   ```yaml
+   nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
+   ```
+
+2. **Client timeout:**
+   Check client-side timeout configuration
+
+3. **Network interruption:**
+   Implement reconnection with `Last-Event-ID`
+
+### High Memory Usage
+
+**Symptoms:**
+- OOMKilled events
+- Slow response times
+
+**Diagnosis:**
+```bash
+# Check memory metrics
+kubectl -n rdev top pod -l app=rdev-api
+
+# Check for memory leaks in logs
+kubectl -n rdev logs -l app=rdev-api | grep -i memory
+```
+
+**Solutions:**
+
+1. **Increase limits:**
+   ```yaml
+   resources:
+     limits:
+       memory: "1Gi"
+   ```
+
+2. **Check for stream leaks:**
+   Ensure SSE connections are properly closed
+
+3. **Restart pod:**
+   ```bash
+   kubectl -n rdev rollout restart deployment/rdev-api
+   ```
+
+### High CPU Usage
+
+**Symptoms:**
+- CPU throttling
+- Slow request processing
+
+**Diagnosis:**
+```bash
+# Check CPU metrics
+kubectl -n rdev top pod -l app=rdev-api
+
+# Profile if possible
+kubectl -n rdev exec -it deployment/rdev-api -- curl localhost:8080/debug/pprof/profile > cpu.prof
+```
+
+**Solutions:**
+
+1. **Scale horizontally:**
+   ```bash
+   kubectl -n rdev scale deployment/rdev-api --replicas=3
+   ```
+
+2. **Identify hot paths:**
+   Use profiling to find CPU-intensive code
+
+3. **Check command sanitization:**
+   Complex regex can be expensive
+
+## Recovery Procedures
+
+### Emergency Restart
+
+```bash
+# Restart all pods
+kubectl -n rdev rollout restart deployment/rdev-api
+
+# Scale down and up
+kubectl -n rdev scale deployment/rdev-api --replicas=0
+kubectl -n rdev scale deployment/rdev-api --replicas=2
+```
+
+### Rollback
+
+```bash
+# Check rollout history
+kubectl -n rdev rollout history deployment/rdev-api
+
+# Rollback to previous
+kubectl -n rdev rollout undo deployment/rdev-api
+
+# Rollback to specific revision
+kubectl -n rdev rollout undo deployment/rdev-api --to-revision=5
+```
+
+### Database Recovery
+
+```bash
+# Connect to database
+kubectl -n databases exec -it deployment/postgres -- psql -U rdev
+
+# Check tables
+\dt
+
+# Check recent keys
+SELECT id, name, created_at FROM api_keys ORDER BY created_at DESC LIMIT 10;
+```
+
+## Getting Help
+
+1. Check logs for specific error messages
+2. Search this troubleshooting guide
+3. Check runbooks for specific scenarios
+4. Contact the platform team with:
+   - Request ID (from error response)
+   - Timestamp
+   - Steps to reproduce
+   - Relevant logs
--- a/docs/plans/THREESIX_INFRASTRUCTURE.md
+++ b/docs/plans/THREESIX_INFRASTRUCTURE.md
@ -0,0 +1,930 @@
+# threesix.ai Infrastructure Implementation Plan
+
+> Self-hosted git, CI/CD, and deployment infrastructure for agent-driven development.
+
+## Overview
+
+Replace GitHub dependency with self-hosted infrastructure on k3s:
+- **soft-serve** - Git server (SSH-based, minimal)
+- **Zot** - Container registry (OCI-native)
+- **Woodpecker** - CI/CD pipelines
+- **rdev-api** - Orchestration layer with DNS management
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                              threesix.ai                                     │
+│                                                                              │
+│  git.threesix.ai ──────▶ soft-serve (SSH :22)                               │
+│  registry.threesix.ai ─▶ zot (internal only, HTTPS for UI)                  │
+│  ci.threesix.ai ───────▶ woodpecker (web UI)                                │
+│  *.threesix.ai ────────▶ project deployments                                │
+│                                                                              │
+└─────────────────────────────────────────────────────────────────────────────┘
+
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                           k3s cluster                                        │
+│                                                                              │
+│  ┌──────────────┐    ┌──────────────┐    ┌──────────────┐                   │
+│  │  soft-serve  │───▶│  woodpecker  │───▶│     zot      │                   │
+│  │  (git repos) │    │  (CI/CD)     │    │  (registry)  │                   │
+│  └──────────────┘    └──────────────┘    └──────────────┘                   │
+│         │                   │                   │                            │
+│         │                   ▼                   │                            │
+│         │            ┌──────────────┐           │                            │
+│         └───────────▶│   rdev-api   │◀──────────┘                           │
+│                      │              │                                        │
+│                      │ - Create repos                                        │
+│                      │ - Deploy apps                                         │
+│                      │ - Manage DNS                                          │
+│                      └──────────────┘                                        │
+│                             │                                                │
+│                             ▼                                                │
+│                      ┌──────────────┐                                        │
+│                      │  Cloudflare  │                                        │
+│                      │  DNS API     │                                        │
+│                      └──────────────┘                                        │
+│                                                                              │
+└─────────────────────────────────────────────────────────────────────────────┘
+```
+
+## Configuration
+
+### Credentials (from .secrets)
+
+| Key | Value | Purpose |
+|-----|-------|---------|
+| CLOUDFLARE_API_TOKEN | `nGoDhG6Za...` | DNS management |
+| CLOUDFLARE_ZONE_ID | `e0bc8d51...` | threesix.ai zone |
+
+### Network
+
+| Resource | Value |
+|----------|-------|
+| External IP | 208.122.204.172 |
+| Let's Encrypt Email | jordan@threesix.ai |
+| Domain | threesix.ai |
+
+### Admin Access
+
+```
+SSH Public Key: ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDZwQF0Ro0E0foFo0oro/NrfUb5abEec/A0OP2qO8dVn jordanwashburn@jordanmacstudio.lan
+```
+
+---
+
+## Phase 1: Foundation (K8s Infrastructure)
+
+### 1.1 Create Namespace and Secrets
+
+```yaml
+# deployments/k8s/base/threesix/namespace.yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: threesix
+---
+# Cloudflare API secret for cert-manager and rdev-api
+apiVersion: v1
+kind: Secret
+metadata:
+  name: cloudflare-api
+  namespace: threesix
+type: Opaque
+stringData:
+  api-token: "${CLOUDFLARE_API_TOKEN}"
+  zone-id: "${CLOUDFLARE_ZONE_ID}"
+```
+
+### 1.2 Configure cert-manager for Wildcard Certs
+
+```yaml
+# deployments/k8s/base/threesix/cluster-issuer.yaml
+apiVersion: cert-manager.io/v1
+kind: ClusterIssuer
+metadata:
+  name: letsencrypt-threesix
+spec:
+  acme:
+    server: https://acme-v02.api.letsencrypt.org/directory
+    email: jordan@threesix.ai
+    privateKeySecretRef:
+      name: letsencrypt-threesix-account
+    solvers:
+    - dns01:
+        cloudflare:
+          apiTokenSecretRef:
+            name: cloudflare-api
+            key: api-token
+      selector:
+        dnsZones:
+        - "threesix.ai"
+---
+# Wildcard certificate
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+  name: threesix-wildcard
+  namespace: threesix
+spec:
+  secretName: threesix-wildcard-tls
+  issuerRef:
+    name: letsencrypt-threesix
+    kind: ClusterIssuer
+  dnsNames:
+  - "threesix.ai"
+  - "*.threesix.ai"
+```
+
+### 1.3 Deploy soft-serve
+
+```yaml
+# deployments/k8s/base/threesix/soft-serve.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: soft-serve-config
+  namespace: threesix
+data:
+  config.yaml: |
+    name: threesix
+    log_format: text
+    ssh:
+      listen_addr: :22
+      public_url: ssh://git.threesix.ai
+      max_timeout: 30
+      idle_timeout: 120
+    http:
+      listen_addr: :23231
+      public_url: https://git.threesix.ai
+    stats:
+      listen_addr: :23233
+    initial_admin_keys:
+      - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDZwQF0Ro0E0foFo0oro/NrfUb5abEec/A0OP2qO8dVn jordanwashburn"
+    # Allow anyone to read public repos, admins can create
+    anon_access: read-only
+---
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: soft-serve
+  namespace: threesix
+spec:
+  serviceName: soft-serve
+  replicas: 1
+  selector:
+    matchLabels:
+      app: soft-serve
+  template:
+    metadata:
+      labels:
+        app: soft-serve
+    spec:
+      containers:
+      - name: soft-serve
+        image: charmcli/soft-serve:latest
+        ports:
+        - containerPort: 22
+          name: ssh
+        - containerPort: 23231
+          name: http
+        - containerPort: 23233
+          name: stats
+        volumeMounts:
+        - name: data
+          mountPath: /soft-serve
+        - name: config
+          mountPath: /soft-serve/config.yaml
+          subPath: config.yaml
+        resources:
+          requests:
+            memory: "64Mi"
+            cpu: "50m"
+          limits:
+            memory: "256Mi"
+            cpu: "500m"
+      volumes:
+      - name: config
+        configMap:
+          name: soft-serve-config
+  volumeClaimTemplates:
+  - metadata:
+      name: data
+    spec:
+      accessModes: ["ReadWriteOnce"]
+      storageClassName: longhorn
+      resources:
+        requests:
+          storage: 10Gi
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: soft-serve
+  namespace: threesix
+spec:
+  selector:
+    app: soft-serve
+  ports:
+  - name: ssh
+    port: 22
+    targetPort: 22
+  - name: http
+    port: 80
+    targetPort: 23231
+  - name: stats
+    port: 23233
+    targetPort: 23233
+---
+# External SSH access via LoadBalancer
+apiVersion: v1
+kind: Service
+metadata:
+  name: soft-serve-ssh
+  namespace: threesix
+spec:
+  type: LoadBalancer
+  selector:
+    app: soft-serve
+  ports:
+  - name: ssh
+    port: 22
+    targetPort: 22
+---
+# HTTP access via Ingress
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: soft-serve
+  namespace: threesix
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-threesix
+spec:
+  ingressClassName: traefik
+  tls:
+  - hosts:
+    - git.threesix.ai
+    secretName: git-threesix-tls
+  rules:
+  - host: git.threesix.ai
+    http:
+      paths:
+      - path: /
+        pathType: Prefix
+        backend:
+          service:
+            name: soft-serve
+            port:
+              number: 80
+```
+
+### 1.4 Deploy Zot Registry
+
+```yaml
+# deployments/k8s/base/threesix/zot.yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: zot-config
+  namespace: threesix
+data:
+  config.json: |
+    {
+      "distSpecVersion": "1.1.0",
+      "storage": {
+        "rootDirectory": "/var/lib/zot",
+        "gc": true,
+        "gcDelay": "1h"
+      },
+      "http": {
+        "address": "0.0.0.0",
+        "port": "5000"
+      },
+      "log": {
+        "level": "info"
+      },
+      "extensions": {
+        "search": {
+          "enable": true
+        },
+        "ui": {
+          "enable": true
+        }
+      }
+    }
+---
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: zot
+  namespace: threesix
+spec:
+  serviceName: zot
+  replicas: 1
+  selector:
+    matchLabels:
+      app: zot
+  template:
+    metadata:
+      labels:
+        app: zot
+    spec:
+      containers:
+      - name: zot
+        image: ghcr.io/project-zot/zot-linux-amd64:latest
+        ports:
+        - containerPort: 5000
+        volumeMounts:
+        - name: data
+          mountPath: /var/lib/zot
+        - name: config
+          mountPath: /etc/zot/config.json
+          subPath: config.json
+        resources:
+          requests:
+            memory: "128Mi"
+            cpu: "100m"
+          limits:
+            memory: "512Mi"
+            cpu: "1000m"
+      volumes:
+      - name: config
+        configMap:
+          name: zot-config
+  volumeClaimTemplates:
+  - metadata:
+      name: data
+    spec:
+      accessModes: ["ReadWriteOnce"]
+      storageClassName: longhorn
+      resources:
+        requests:
+          storage: 50Gi
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: zot
+  namespace: threesix
+spec:
+  selector:
+    app: zot
+  ports:
+  - port: 5000
+    targetPort: 5000
+---
+# Internal DNS name for cluster access
+# Pods can pull from: zot.threesix.svc.cluster.local:5000/image:tag
+---
+# Optional: External UI access
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: zot
+  namespace: threesix
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-threesix
+spec:
+  ingressClassName: traefik
+  tls:
+  - hosts:
+    - registry.threesix.ai
+    secretName: registry-threesix-tls
+  rules:
+  - host: registry.threesix.ai
+    http:
+      paths:
+      - path: /
+        pathType: Prefix
+        backend:
+          service:
+            name: zot
+            port:
+              number: 5000
+```
+
+### 1.5 Initial DNS Records
+
+Create via Cloudflare API or dashboard:
+
+| Type | Name | Value | Proxy |
+|------|------|-------|-------|
+| A | git | 208.122.204.172 | No (SSH needs direct) |
+| A | registry | 208.122.204.172 | No |
+| A | ci | 208.122.204.172 | Yes (optional) |
+| A | * | 208.122.204.172 | Yes (optional) |
+
+---
+
+## Phase 2: CI/CD (Woodpecker)
+
+### 2.1 Deploy Woodpecker Server
+
+```yaml
+# deployments/k8s/base/threesix/woodpecker-server.yaml
+apiVersion: v1
+kind: Secret
+metadata:
+  name: woodpecker-secrets
+  namespace: threesix
+type: Opaque
+stringData:
+  # Generate with: openssl rand -hex 32
+  WOODPECKER_AGENT_SECRET: "${WOODPECKER_AGENT_SECRET}"
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: woodpecker-server
+  namespace: threesix
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: woodpecker-server
+  template:
+    metadata:
+      labels:
+        app: woodpecker-server
+    spec:
+      containers:
+      - name: woodpecker
+        image: woodpeckerci/woodpecker-server:latest
+        ports:
+        - containerPort: 8000
+        env:
+        - name: WOODPECKER_HOST
+          value: "https://ci.threesix.ai"
+        - name: WOODPECKER_OPEN
+          value: "false"
+        - name: WOODPECKER_ADMIN
+          value: "jordan"
+        # Soft-serve / generic git integration
+        - name: WOODPECKER_GITEA
+          value: "false"
+        - name: WOODPECKER_WEBHOOK_HOST
+          value: "http://woodpecker-server.threesix.svc:8000"
+        envFrom:
+        - secretRef:
+            name: woodpecker-secrets
+        volumeMounts:
+        - name: data
+          mountPath: /var/lib/woodpecker
+      volumes:
+      - name: data
+        persistentVolumeClaim:
+          claimName: woodpecker-data
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: woodpecker-data
+  namespace: threesix
+spec:
+  accessModes: ["ReadWriteOnce"]
+  storageClassName: longhorn
+  resources:
+    requests:
+      storage: 5Gi
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: woodpecker-server
+  namespace: threesix
+spec:
+  selector:
+    app: woodpecker-server
+  ports:
+  - name: http
+    port: 8000
+    targetPort: 8000
+  - name: grpc
+    port: 9000
+    targetPort: 9000
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: woodpecker
+  namespace: threesix
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-threesix
+spec:
+  ingressClassName: traefik
+  tls:
+  - hosts:
+    - ci.threesix.ai
+    secretName: ci-threesix-tls
+  rules:
+  - host: ci.threesix.ai
+    http:
+      paths:
+      - path: /
+        pathType: Prefix
+        backend:
+          service:
+            name: woodpecker-server
+            port:
+              number: 8000
+```
+
+### 2.2 Deploy Woodpecker Agent (with Kaniko)
+
+```yaml
+# deployments/k8s/base/threesix/woodpecker-agent.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: woodpecker-agent
+  namespace: threesix
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: woodpecker-agent
+  template:
+    metadata:
+      labels:
+        app: woodpecker-agent
+    spec:
+      containers:
+      - name: agent
+        image: woodpeckerci/woodpecker-agent:latest
+        env:
+        - name: WOODPECKER_SERVER
+          value: "woodpecker-server.threesix.svc:9000"
+        - name: WOODPECKER_BACKEND
+          value: "kubernetes"
+        - name: WOODPECKER_BACKEND_K8S_NAMESPACE
+          value: "threesix"
+        - name: WOODPECKER_BACKEND_K8S_STORAGE_CLASS
+          value: "longhorn"
+        - name: WOODPECKER_BACKEND_K8S_VOLUME_SIZE
+          value: "10Gi"
+        envFrom:
+        - secretRef:
+            name: woodpecker-secrets
+      serviceAccountName: woodpecker-agent
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: woodpecker-agent
+  namespace: threesix
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: woodpecker-agent
+  namespace: threesix
+rules:
+- apiGroups: [""]
+  resources: ["pods", "pods/log", "secrets", "configmaps", "persistentvolumeclaims"]
+  verbs: ["*"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: woodpecker-agent
+  namespace: threesix
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: woodpecker-agent
+subjects:
+- kind: ServiceAccount
+  name: woodpecker-agent
+  namespace: threesix
+```
+
+---
+
+## Phase 3: rdev-api Extensions
+
+### 3.1 New Port Interfaces
+
+```go
+// internal/port/git.go
+package port
+
+import "context"
+
+// GitRepository manages git repositories.
+type GitRepository interface {
+    // CreateRepo creates a new git repository.
+    CreateRepo(ctx context.Context, name, description string) (*Repo, error)
+
+    // DeleteRepo deletes a repository.
+    DeleteRepo(ctx context.Context, name string) error
+
+    // ListRepos returns all repositories.
+    ListRepos(ctx context.Context) ([]*Repo, error)
+
+    // GetRepo returns a single repository.
+    GetRepo(ctx context.Context, name string) (*Repo, error)
+
+    // AddCollaborator adds a user's SSH key to a repo.
+    AddCollaborator(ctx context.Context, repo, keyName, publicKey string) error
+
+    // AddWebhook adds a webhook to trigger on push.
+    AddWebhook(ctx context.Context, repo, url, secret string) error
+}
+
+// Repo represents a git repository.
+type Repo struct {
+    Name        string
+    Description string
+    CloneSSH    string // ssh://git@git.threesix.ai/name.git
+    CloneHTTP   string // https://git.threesix.ai/name.git
+    CreatedAt   time.Time
+}
+```
+
+```go
+// internal/port/dns.go
+package port
+
+import "context"
+
+// DNSProvider manages DNS records.
+type DNSProvider interface {
+    // CreateRecord creates a DNS record.
+    CreateRecord(ctx context.Context, record DNSRecord) error
+
+    // DeleteRecord removes a DNS record.
+    DeleteRecord(ctx context.Context, recordType, name string) error
+
+    // ListRecords returns all records for the zone.
+    ListRecords(ctx context.Context) ([]*DNSRecord, error)
+}
+
+// DNSRecord represents a DNS record.
+type DNSRecord struct {
+    Type    string // A, CNAME, TXT
+    Name    string // subdomain or @ for root
+    Content string // IP or target
+    TTL     int    // seconds, 1 = auto
+    Proxied bool   // Cloudflare proxy
+}
+```
+
+```go
+// internal/port/deployer.go
+package port
+
+import "context"
+
+// Deployer manages application deployments.
+type Deployer interface {
+    // Deploy creates or updates a deployment.
+    Deploy(ctx context.Context, spec DeploySpec) error
+
+    // Undeploy removes a deployment.
+    Undeploy(ctx context.Context, projectName string) error
+
+    // GetStatus returns deployment status.
+    GetStatus(ctx context.Context, projectName string) (*DeployStatus, error)
+}
+
+// DeploySpec defines a deployment.
+type DeploySpec struct {
+    ProjectName string
+    Image       string
+    Domain      string   // e.g., "myapp.threesix.ai"
+    Port        int      // container port
+    Replicas    int
+    EnvVars     map[string]string
+    Secrets     map[string]string
+}
+
+// DeployStatus represents current deployment state.
+type DeployStatus struct {
+    ProjectName    string
+    Image          string
+    Replicas       int
+    ReadyReplicas  int
+    URL            string
+    Status         string // "running", "pending", "failed"
+}
+```
+
+### 3.2 New Adapters
+
+```
+internal/adapter/
+├── softserve/       # soft-serve SSH/API client
+│   └── client.go
+├── cloudflare/      # Cloudflare DNS API client
+│   └── client.go
+├── deployer/        # K8s deployment manager
+│   └── deployer.go
+└── registry/        # Zot registry client (optional)
+    └── client.go
+```
+
+### 3.3 New Handlers
+
+```go
+// internal/handlers/projects_git.go
+
+// POST /projects/{id}/repo - Create git repo for project
+// DELETE /projects/{id}/repo - Delete git repo
+// GET /projects/{id}/repo - Get repo info
+
+// POST /projects/{id}/deploy - Deploy project
+// DELETE /projects/{id}/deploy - Undeploy project
+// GET /projects/{id}/deploy/status - Get deployment status
+
+// POST /projects/{id}/domain - Set custom domain
+// DELETE /projects/{id}/domain - Remove custom domain
+```
+
+### 3.4 New API Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| POST | `/projects/{id}/repo` | Create git repo |
+| DELETE | `/projects/{id}/repo` | Delete git repo |
+| GET | `/projects/{id}/repo` | Get repo info (clone URLs) |
+| POST | `/projects/{id}/deploy` | Deploy from image |
+| DELETE | `/projects/{id}/deploy` | Remove deployment |
+| GET | `/projects/{id}/deploy/status` | Deployment status |
+| POST | `/projects/{id}/domain` | Add custom domain |
+| DELETE | `/projects/{id}/domain` | Remove custom domain |
+
+---
+
+## Phase 4: Database Schema
+
+### 4.1 Migration: Add Git and Deployment Fields
+
+```sql
+-- migrations/010_project_infrastructure.up.sql
+
+-- Add infrastructure fields to projects
+ALTER TABLE projects ADD COLUMN IF NOT EXISTS
+    git_repo_name VARCHAR(255);
+
+ALTER TABLE projects ADD COLUMN IF NOT EXISTS
+    git_clone_ssh VARCHAR(512);
+
+ALTER TABLE projects ADD COLUMN IF NOT EXISTS
+    git_clone_http VARCHAR(512);
+
+ALTER TABLE projects ADD COLUMN IF NOT EXISTS
+    domain VARCHAR(255);
+
+ALTER TABLE projects ADD COLUMN IF NOT EXISTS
+    custom_domain VARCHAR(255);
+
+ALTER TABLE projects ADD COLUMN IF NOT EXISTS
+    deployment_image VARCHAR(512);
+
+ALTER TABLE projects ADD COLUMN IF NOT EXISTS
+    deployment_status VARCHAR(50) DEFAULT 'none';
+
+ALTER TABLE projects ADD COLUMN IF NOT EXISTS
+    deployment_replicas INTEGER DEFAULT 1;
+
+-- Index for domain lookups
+CREATE INDEX IF NOT EXISTS idx_projects_domain ON projects(domain);
+CREATE INDEX IF NOT EXISTS idx_projects_custom_domain ON projects(custom_domain);
+```
+
+---
+
+## Phase 5: Pantheon Integration
+
+### 5.1 New Commands for Agents
+
+```
+/project create <name>
+  → Creates project in DB
+  → Creates git repo in soft-serve
+  → Creates DNS record (<name>.threesix.ai)
+  → Returns clone URL
+
+/project deploy <name>
+  → Triggers build from latest commit
+  → Deploys to k8s
+  → Returns live URL
+
+/project status <name>
+  → Shows git repo, deployment status, URLs
+
+/project domain <name> <custom-domain>
+  → Adds custom domain to project
+  → Instructions for DNS pointing
+```
+
+### 5.2 Webhook Flow
+
+```
+Agent pushes code
+        │
+        ▼
+soft-serve receives push
+        │
+        ▼
+Webhook fires to Woodpecker
+        │
+        ▼
+Woodpecker reads .woodpecker.yml
+        │
+        ▼
+Kaniko builds image, pushes to zot
+        │
+        ▼
+Woodpecker calls rdev-api: POST /projects/{id}/deploy
+        │
+        ▼
+rdev-api creates/updates K8s resources
+        │
+        ▼
+Project live at https://{name}.threesix.ai
+```
+
+---
+
+## Implementation Checklist
+
+### Phase 1: Foundation
+- [ ] Create `threesix` namespace
+- [ ] Create Cloudflare API secret
+- [ ] Configure ClusterIssuer for DNS-01 challenge
+- [ ] Request wildcard certificate
+- [ ] Deploy soft-serve StatefulSet
+- [ ] Configure soft-serve LoadBalancer for SSH
+- [ ] Deploy Zot registry
+- [ ] Create initial DNS records (git, registry, ci, wildcard)
+- [ ] Test: `ssh git@git.threesix.ai` works
+- [ ] Test: `https://registry.threesix.ai` shows Zot UI
+
+### Phase 2: CI/CD
+- [ ] Generate Woodpecker agent secret
+- [ ] Deploy Woodpecker server
+- [ ] Deploy Woodpecker agents
+- [ ] Configure soft-serve webhook to Woodpecker
+- [ ] Test: push triggers build
+- [ ] Test: Kaniko builds and pushes to Zot
+
+### Phase 3: rdev-api
+- [ ] Add GitRepository port interface
+- [ ] Add DNSProvider port interface
+- [ ] Add Deployer port interface
+- [ ] Implement soft-serve adapter
+- [ ] Implement Cloudflare adapter
+- [ ] Implement K8s deployer adapter
+- [ ] Add database migration
+- [ ] Add new handlers
+- [ ] Test: API can create repos
+- [ ] Test: API can manage DNS
+- [ ] Test: API can deploy apps
+
+### Phase 4: Integration
+- [ ] Wire up webhook: build → deploy
+- [ ] Add project commands to Pantheon
+- [ ] Test: end-to-end "create project" → "push code" → "live site"
+
+### Phase 5: Polish
+- [ ] Custom domain support
+- [ ] Build notifications to Pantheon
+- [ ] Deployment logs streaming
+- [ ] Resource limits per project
+- [ ] Usage metrics
+
+---
+
+## Resource Estimates
+
+| Component | CPU Request | Memory Request | Storage |
+|-----------|-------------|----------------|---------|
+| soft-serve | 50m | 64Mi | 10Gi |
+| Zot | 100m | 128Mi | 50Gi |
+| Woodpecker Server | 100m | 128Mi | 5Gi |
+| Woodpecker Agent (x2) | 200m each | 256Mi each | - |
+| **Total** | ~650m | ~832Mi | 65Gi |
+
+---
+
+## Security Considerations
+
+1. **soft-serve admin key** - Only jordan's key is admin initially
+2. **Registry access** - Internal only, no auth needed (ClusterIP)
+3. **Woodpecker** - Closed registration, admin-only access
+4. **Cloudflare token** - Scoped to DNS edit only
+5. **Deploy permissions** - rdev-api ServiceAccount limited to `threesix` and `projects` namespaces
+
+---
+
+## Next Steps
+
+1. Review this plan
+2. I deploy Phase 1 infrastructure
+3. Test git and registry
+4. Deploy Phase 2 CI/CD
+5. Implement Phase 3 rdev-api changes
+6. Integration testing
+7. Pantheon integration
--- a/go.mod
+++ b/go.mod
@ -1,11 +1,71 @@
 module github.com/orchard9/rdev

-go 1.23
+go 1.25.0

 require (
 	github.com/bdpiprava/scalar-go v0.13.0
 	github.com/go-chi/chi/v5 v5.1.0
 	github.com/lib/pq v1.10.9
+	github.com/prometheus/client_golang v1.23.2
+	go.opentelemetry.io/otel v1.39.0
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.39.0
+	go.opentelemetry.io/otel/sdk v1.39.0
+	go.opentelemetry.io/otel/trace v1.39.0
+	k8s.io/api v0.35.0
+	k8s.io/apimachinery v0.35.0
+	k8s.io/client-go v0.35.0
 )

-require gopkg.in/yaml.v3 v3.0.1 // indirect
+require (
+	github.com/beorn7/perks v1.0.1 // indirect
+	github.com/cenkalti/backoff/v5 v5.0.3 // indirect
+	github.com/cespare/xxhash/v2 v2.3.0 // indirect
+	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/emicklei/go-restful/v3 v3.12.2 // indirect
+	github.com/fxamacker/cbor/v2 v2.9.0 // indirect
+	github.com/go-logr/logr v1.4.3 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/go-openapi/jsonpointer v0.21.0 // indirect
+	github.com/go-openapi/jsonreference v0.20.2 // indirect
+	github.com/go-openapi/swag v0.23.0 // indirect
+	github.com/google/gnostic-models v0.7.0 // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 // indirect
+	github.com/josharian/intern v1.0.0 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/mailru/easyjson v0.7.7 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
+	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/prometheus/client_model v0.6.2 // indirect
+	github.com/prometheus/common v0.66.1 // indirect
+	github.com/prometheus/procfs v0.16.1 // indirect
+	github.com/spf13/pflag v1.0.9 // indirect
+	github.com/x448/float16 v0.8.4 // indirect
+	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 // indirect
+	go.opentelemetry.io/otel/metric v1.39.0 // indirect
+	go.opentelemetry.io/proto/otlp v1.9.0 // indirect
+	go.yaml.in/yaml/v2 v2.4.3 // indirect
+	go.yaml.in/yaml/v3 v3.0.4 // indirect
+	golang.org/x/net v0.47.0 // indirect
+	golang.org/x/oauth2 v0.32.0 // indirect
+	golang.org/x/sys v0.39.0 // indirect
+	golang.org/x/term v0.37.0 // indirect
+	golang.org/x/text v0.31.0 // indirect
+	golang.org/x/time v0.9.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect
+	google.golang.org/grpc v1.77.0 // indirect
+	google.golang.org/protobuf v1.36.10 // indirect
+	gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
+	gopkg.in/inf.v0 v0.9.1 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+	k8s.io/klog/v2 v2.130.1 // indirect
+	k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect
+	k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 // indirect
+	sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
+	sigs.k8s.io/randfill v1.0.0 // indirect
+	sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
+	sigs.k8s.io/yaml v1.6.0 // indirect
+)
--- a/go.sum
+++ b/go.sum
@ -1,16 +1,188 @@
+github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
+github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
 github.com/bdpiprava/scalar-go v0.13.0 h1:TuhOwYalDpLAziohyEwZlq4PqtEJ+6P/V92dDCdja9k=
 github.com/bdpiprava/scalar-go v0.13.0/go.mod h1:e5Nn4yIhcYjlucu4ACMqcs410nIAe5whqj78H3Qv7vw=
+github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
+github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
+github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
+github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU=
+github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=
+github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
 github.com/go-chi/chi/v5 v5.1.0 h1:acVI1TYaD+hhedDJ3r54HyA6sExp3HfXq7QWEEY/xMw=
 github.com/go-chi/chi/v5 v5.1.0/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
+github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
+github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
+github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
+github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
+github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
+github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
+github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
+github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
+github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
+github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
+github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo=
+github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
+github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 h1:NmZ1PKzSTQbuGHw9DGPFomqkkLWMC+vZCkfs+FHv1Vg=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3/go.mod h1:zQrxl1YP88HQlA6i9c63DSVPFklWpGX4OWAc9bFuaH4=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
+github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
+github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
+github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
 github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
 github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
+github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8=
+github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
+github.com/onsi/ginkgo/v2 v2.27.2 h1:LzwLj0b89qtIy6SSASkzlNvX6WktqurSHwkk2ipF/Ns=
+github.com/onsi/ginkgo/v2 v2.27.2/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo=
+github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
+github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
-github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
-gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
+github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
+github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
+github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
+github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
+github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
+github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
+github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
+github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
+github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
+github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
+github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
+github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
+go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
+go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
+go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48=
+go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0 h1:f0cb2XPmrqn4XMy9PNliTgRKJgS5WcL/u0/WRYGz4t0=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.39.0/go.mod h1:vnakAaFckOMiMtOIhFI2MNH4FYrZzXCYxmb1LlhoGz8=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.39.0 h1:in9O8ESIOlwJAEGTkkf34DesGRAc/Pn8qJ7k3r/42LM=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.39.0/go.mod h1:Rp0EXBm5tfnv0WL+ARyO/PHBEaEAT8UUHQ6AGJcSq6c=
+go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0=
+go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs=
+go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18=
+go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE=
+go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8=
+go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew=
+go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI=
+go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA=
+go.opentelemetry.io/proto/otlp v1.9.0 h1:l706jCMITVouPOqEnii2fIAuO3IVGBRPV5ICjceRb/A=
+go.opentelemetry.io/proto/otlp v1.9.0/go.mod h1:xE+Cx5E/eEHw+ISFkwPLwCZefwVjY+pqKg1qcK03+/4=
+go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
+go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
+go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
+go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
+go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
+go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
+golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
+golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
+golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
+golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
+golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY=
+golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
+golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
+golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
+golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU=
+golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254=
+golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
+golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
+golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
+golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
+golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
+golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
+gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
+gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
+google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 h1:fCvbg86sFXwdrl5LgVcTEvNC+2txB5mgROGmRL5mrls=
+google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:+rXWjjaukWZun3mLfjmVnQi18E1AsFbDN9QdJ5YXLto=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 h1:gRkg/vSppuSQoDjxyiGfN4Upv/h/DQmIR10ZU8dh4Ww=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk=
+google.golang.org/grpc v1.77.0 h1:wVVY6/8cGA6vvffn+wWK5ToddbgdU3d8MNENr4evgXM=
+google.golang.org/grpc v1.77.0/go.mod h1:z0BY1iVj0q8E1uSQCjL9cppRj+gnZjzDnzV0dHhrNig=
+google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
+google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo=
+gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
+gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
+gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+k8s.io/api v0.35.0 h1:iBAU5LTyBI9vw3L5glmat1njFK34srdLmktWwLTprlY=
+k8s.io/api v0.35.0/go.mod h1:AQ0SNTzm4ZAczM03QH42c7l3bih1TbAXYo0DkF8ktnA=
+k8s.io/apimachinery v0.35.0 h1:Z2L3IHvPVv/MJ7xRxHEtk6GoJElaAqDCCU0S6ncYok8=
+k8s.io/apimachinery v0.35.0/go.mod h1:jQCgFZFR1F4Ik7hvr2g84RTJSZegBc8yHgFWKn//hns=
+k8s.io/client-go v0.35.0 h1:IAW0ifFbfQQwQmga0UdoH0yvdqrbwMdq9vIFEhRpxBE=
+k8s.io/client-go v0.35.0/go.mod h1:q2E5AAyqcbeLGPdoRB+Nxe3KYTfPce1Dnu1myQdqz9o=
+k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
+k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
+k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZcmKS3g6CthxToOb37KgwE=
+k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ=
+k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 h1:SjGebBtkBqHFOli+05xYbK8YF1Dzkbzn+gDM4X9T4Ck=
+k8s.io/utils v0.0.0-20251002143259-bc988d571ff4/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
+sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg=
+sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
+sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
+sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
+sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco=
+sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE=
+sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
+sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=
--- a/internal/adapter/cached/project_repository.go
+++ b/internal/adapter/cached/project_repository.go
@ -0,0 +1,213 @@
+// Package cached provides caching wrappers for repositories.
+package cached
+
+import (
+	"context"
+	"sync"
+	"time"
+
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/port"
+)
+
+// ProjectRepository wraps another ProjectRepository with caching.
+// The cache TTL determines how long the project list is cached before
+// a refresh is needed. Individual project lookups are also cached.
+type ProjectRepository struct {
+	inner port.ProjectRepository
+	ttl   time.Duration
+
+	mu            sync.RWMutex
+	projectsCache []domain.Project
+	projectMap    map[domain.ProjectID]*domain.Project
+	lastFetch     time.Time
+}
+
+// NewProjectRepository creates a caching wrapper around a ProjectRepository.
+func NewProjectRepository(inner port.ProjectRepository, ttl time.Duration) *ProjectRepository {
+	if ttl <= 0 {
+		ttl = 30 * time.Second // Default cache TTL
+	}
+	return &ProjectRepository{
+		inner:      inner,
+		ttl:        ttl,
+		projectMap: make(map[domain.ProjectID]*domain.Project),
+	}
+}
+
+// List returns all projects, using cache if fresh.
+func (r *ProjectRepository) List(ctx context.Context) ([]domain.Project, error) {
+	// Check cache first
+	r.mu.RLock()
+	if r.isCacheFresh() {
+		projects := make([]domain.Project, len(r.projectsCache))
+		copy(projects, r.projectsCache)
+		r.mu.RUnlock()
+		return projects, nil
+	}
+	r.mu.RUnlock()
+
+	// Cache miss - acquire write lock and refresh
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	// Double-check after acquiring write lock
+	if r.isCacheFresh() {
+		projects := make([]domain.Project, len(r.projectsCache))
+		copy(projects, r.projectsCache)
+		return projects, nil
+	}
+
+	// Fetch from inner repository
+	projects, err := r.inner.List(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	// Update cache
+	r.projectsCache = projects
+	r.projectMap = make(map[domain.ProjectID]*domain.Project, len(projects))
+	for i := range projects {
+		r.projectMap[projects[i].ID] = &projects[i]
+	}
+	r.lastFetch = time.Now()
+
+	// Return a copy to prevent mutation
+	result := make([]domain.Project, len(projects))
+	copy(result, projects)
+	return result, nil
+}
+
+// Get returns a single project by ID, using cache if available.
+func (r *ProjectRepository) Get(ctx context.Context, id domain.ProjectID) (*domain.Project, error) {
+	// Check cache first
+	r.mu.RLock()
+	if r.isCacheFresh() {
+		if p, ok := r.projectMap[id]; ok {
+			// Return a copy
+			copied := *p
+			r.mu.RUnlock()
+			return &copied, nil
+		}
+		r.mu.RUnlock()
+		return nil, domain.ErrProjectNotFound
+	}
+	r.mu.RUnlock()
+
+	// Cache stale - refresh and try again
+	_, err := r.List(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	if p, ok := r.projectMap[id]; ok {
+		copied := *p
+		return &copied, nil
+	}
+	return nil, domain.ErrProjectNotFound
+}
+
+// Exists checks if a project exists by ID.
+func (r *ProjectRepository) Exists(ctx context.Context, id domain.ProjectID) (bool, error) {
+	r.mu.RLock()
+	if r.isCacheFresh() {
+		_, exists := r.projectMap[id]
+		r.mu.RUnlock()
+		return exists, nil
+	}
+	r.mu.RUnlock()
+
+	// Cache stale - refresh
+	_, err := r.List(ctx)
+	if err != nil {
+		return false, err
+	}
+
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	_, exists := r.projectMap[id]
+	return exists, nil
+}
+
+// RefreshStatus refreshes project status from the underlying repository.
+// This bypasses the cache and forces a refresh.
+func (r *ProjectRepository) RefreshStatus(ctx context.Context) error {
+	err := r.inner.RefreshStatus(ctx)
+	if err != nil {
+		return err
+	}
+
+	// Invalidate cache so next List() fetches fresh data
+	r.mu.Lock()
+	r.lastFetch = time.Time{} // Zero time = stale
+	r.mu.Unlock()
+
+	return nil
+}
+
+// Register is a pass-through that invalidates cache after registration.
+func (r *ProjectRepository) Register(ctx context.Context, p *domain.Project) error {
+	err := r.inner.Register(ctx, p)
+	if err != nil {
+		return err
+	}
+
+	r.mu.Lock()
+	r.lastFetch = time.Time{} // Invalidate cache
+	r.mu.Unlock()
+
+	return nil
+}
+
+// Unregister is a pass-through that invalidates cache after unregistration.
+func (r *ProjectRepository) Unregister(ctx context.Context, id domain.ProjectID) error {
+	err := r.inner.Unregister(ctx, id)
+	if err != nil {
+		return err
+	}
+
+	r.mu.Lock()
+	r.lastFetch = time.Time{} // Invalidate cache
+	r.mu.Unlock()
+
+	return nil
+}
+
+// isCacheFresh checks if the cache is still within TTL.
+// Must be called with at least a read lock held.
+func (r *ProjectRepository) isCacheFresh() bool {
+	if r.lastFetch.IsZero() {
+		return false
+	}
+	return time.Since(r.lastFetch) < r.ttl
+}
+
+// Invalidate forces a cache refresh on next access.
+func (r *ProjectRepository) Invalidate() {
+	r.mu.Lock()
+	r.lastFetch = time.Time{}
+	r.mu.Unlock()
+}
+
+// CacheStats returns statistics about the cache.
+func (r *ProjectRepository) CacheStats() CacheStats {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	return CacheStats{
+		Size:      len(r.projectsCache),
+		LastFetch: r.lastFetch,
+		IsFresh:   r.isCacheFresh(),
+		TTL:       r.ttl,
+	}
+}
+
+// CacheStats contains cache statistics.
+type CacheStats struct {
+	Size      int
+	LastFetch time.Time
+	IsFresh   bool
+	TTL       time.Duration
+}
--- a/internal/adapter/cached/project_repository_test.go
+++ b/internal/adapter/cached/project_repository_test.go
@ -0,0 +1,364 @@
+package cached
+
+import (
+	"context"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/orchard9/rdev/internal/domain"
+)
+
+// mockProjectRepository is a test double for port.ProjectRepository
+type mockProjectRepository struct {
+	projects     []domain.Project
+	listCalls    int
+	refreshCalls int
+	mu           sync.Mutex
+}
+
+func (m *mockProjectRepository) List(ctx context.Context) ([]domain.Project, error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.listCalls++
+	return m.projects, nil
+}
+
+func (m *mockProjectRepository) Get(ctx context.Context, id domain.ProjectID) (*domain.Project, error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	for i := range m.projects {
+		if m.projects[i].ID == id {
+			return &m.projects[i], nil
+		}
+	}
+	return nil, domain.ErrProjectNotFound
+}
+
+func (m *mockProjectRepository) Exists(ctx context.Context, id domain.ProjectID) (bool, error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	for _, p := range m.projects {
+		if p.ID == id {
+			return true, nil
+		}
+	}
+	return false, nil
+}
+
+func (m *mockProjectRepository) RefreshStatus(ctx context.Context) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.refreshCalls++
+	return nil
+}
+
+func (m *mockProjectRepository) Register(ctx context.Context, p *domain.Project) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.projects = append(m.projects, *p)
+	return nil
+}
+
+func (m *mockProjectRepository) Unregister(ctx context.Context, id domain.ProjectID) error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	for i, p := range m.projects {
+		if p.ID == id {
+			m.projects = append(m.projects[:i], m.projects[i+1:]...)
+			break
+		}
+	}
+	return nil
+}
+
+func TestCachedProjectRepository_List_Caches(t *testing.T) {
+	mock := &mockProjectRepository{
+		projects: []domain.Project{
+			{ID: "proj-1", Name: "Project 1"},
+			{ID: "proj-2", Name: "Project 2"},
+		},
+	}
+
+	repo := NewProjectRepository(mock, 1*time.Minute)
+	ctx := context.Background()
+
+	// First call should hit inner repository
+	projects1, err := repo.List(ctx)
+	if err != nil {
+		t.Fatalf("List() error = %v", err)
+	}
+	if len(projects1) != 2 {
+		t.Errorf("List() returned %d projects, want 2", len(projects1))
+	}
+	if mock.listCalls != 1 {
+		t.Errorf("Inner List called %d times, want 1", mock.listCalls)
+	}
+
+	// Second call should use cache
+	projects2, err := repo.List(ctx)
+	if err != nil {
+		t.Fatalf("List() error = %v", err)
+	}
+	if len(projects2) != 2 {
+		t.Errorf("List() returned %d projects, want 2", len(projects2))
+	}
+	if mock.listCalls != 1 {
+		t.Errorf("Inner List should not be called again, was called %d times", mock.listCalls)
+	}
+}
+
+func TestCachedProjectRepository_List_Expires(t *testing.T) {
+	mock := &mockProjectRepository{
+		projects: []domain.Project{
+			{ID: "proj-1", Name: "Project 1"},
+		},
+	}
+
+	// Very short TTL for testing
+	repo := NewProjectRepository(mock, 50*time.Millisecond)
+	ctx := context.Background()
+
+	// First call
+	_, _ = repo.List(ctx)
+	if mock.listCalls != 1 {
+		t.Errorf("Expected 1 call, got %d", mock.listCalls)
+	}
+
+	// Wait for cache to expire
+	time.Sleep(60 * time.Millisecond)
+
+	// Should hit inner repository again
+	_, _ = repo.List(ctx)
+	if mock.listCalls != 2 {
+		t.Errorf("Expected 2 calls after expiry, got %d", mock.listCalls)
+	}
+}
+
+func TestCachedProjectRepository_Get_UsesCache(t *testing.T) {
+	mock := &mockProjectRepository{
+		projects: []domain.Project{
+			{ID: "proj-1", Name: "Project 1"},
+		},
+	}
+
+	repo := NewProjectRepository(mock, 1*time.Minute)
+	ctx := context.Background()
+
+	// Warm the cache
+	_, _ = repo.List(ctx)
+
+	// Get should use cached data
+	project, err := repo.Get(ctx, "proj-1")
+	if err != nil {
+		t.Fatalf("Get() error = %v", err)
+	}
+	if project.Name != "Project 1" {
+		t.Errorf("Name = %q, want %q", project.Name, "Project 1")
+	}
+
+	// Should not have called List again
+	if mock.listCalls != 1 {
+		t.Errorf("Inner List called %d times, want 1", mock.listCalls)
+	}
+}
+
+func TestCachedProjectRepository_Get_NotFound(t *testing.T) {
+	mock := &mockProjectRepository{
+		projects: []domain.Project{
+			{ID: "proj-1", Name: "Project 1"},
+		},
+	}
+
+	repo := NewProjectRepository(mock, 1*time.Minute)
+	ctx := context.Background()
+
+	_, err := repo.Get(ctx, "nonexistent")
+	if err != domain.ErrProjectNotFound {
+		t.Errorf("Get(nonexistent) error = %v, want ErrProjectNotFound", err)
+	}
+}
+
+func TestCachedProjectRepository_Exists(t *testing.T) {
+	mock := &mockProjectRepository{
+		projects: []domain.Project{
+			{ID: "proj-1", Name: "Project 1"},
+		},
+	}
+
+	repo := NewProjectRepository(mock, 1*time.Minute)
+	ctx := context.Background()
+
+	exists, err := repo.Exists(ctx, "proj-1")
+	if err != nil {
+		t.Fatalf("Exists() error = %v", err)
+	}
+	if !exists {
+		t.Error("Exists(proj-1) = false, want true")
+	}
+
+	exists, err = repo.Exists(ctx, "nonexistent")
+	if err != nil {
+		t.Fatalf("Exists() error = %v", err)
+	}
+	if exists {
+		t.Error("Exists(nonexistent) = true, want false")
+	}
+}
+
+func TestCachedProjectRepository_RefreshStatus_InvalidatesCache(t *testing.T) {
+	mock := &mockProjectRepository{
+		projects: []domain.Project{
+			{ID: "proj-1", Name: "Project 1"},
+		},
+	}
+
+	repo := NewProjectRepository(mock, 1*time.Minute)
+	ctx := context.Background()
+
+	// Warm cache
+	_, _ = repo.List(ctx)
+	if mock.listCalls != 1 {
+		t.Errorf("Expected 1 call, got %d", mock.listCalls)
+	}
+
+	// Refresh status should invalidate cache
+	_ = repo.RefreshStatus(ctx)
+
+	// Next List should hit inner repository
+	_, _ = repo.List(ctx)
+	if mock.listCalls != 2 {
+		t.Errorf("Expected 2 calls after RefreshStatus, got %d", mock.listCalls)
+	}
+}
+
+func TestCachedProjectRepository_Register_InvalidatesCache(t *testing.T) {
+	mock := &mockProjectRepository{
+		projects: []domain.Project{},
+	}
+
+	repo := NewProjectRepository(mock, 1*time.Minute)
+	ctx := context.Background()
+
+	// Warm cache
+	_, _ = repo.List(ctx)
+
+	// Register should invalidate cache
+	_ = repo.Register(ctx, &domain.Project{ID: "new-proj", Name: "New"})
+
+	// Next List should hit inner repository
+	_, _ = repo.List(ctx)
+	if mock.listCalls != 2 {
+		t.Errorf("Expected 2 calls after Register, got %d", mock.listCalls)
+	}
+}
+
+func TestCachedProjectRepository_Invalidate(t *testing.T) {
+	mock := &mockProjectRepository{
+		projects: []domain.Project{
+			{ID: "proj-1", Name: "Project 1"},
+		},
+	}
+
+	repo := NewProjectRepository(mock, 1*time.Minute)
+	ctx := context.Background()
+
+	// Warm cache
+	_, _ = repo.List(ctx)
+
+	// Manually invalidate
+	repo.Invalidate()
+
+	// Next List should hit inner repository
+	_, _ = repo.List(ctx)
+	if mock.listCalls != 2 {
+		t.Errorf("Expected 2 calls after Invalidate, got %d", mock.listCalls)
+	}
+}
+
+func TestCachedProjectRepository_CacheStats(t *testing.T) {
+	mock := &mockProjectRepository{
+		projects: []domain.Project{
+			{ID: "proj-1", Name: "Project 1"},
+			{ID: "proj-2", Name: "Project 2"},
+		},
+	}
+
+	repo := NewProjectRepository(mock, 1*time.Minute)
+	ctx := context.Background()
+
+	// Before warming
+	stats := repo.CacheStats()
+	if stats.IsFresh {
+		t.Error("Cache should not be fresh before List")
+	}
+	if stats.Size != 0 {
+		t.Errorf("Size = %d, want 0", stats.Size)
+	}
+
+	// After warming
+	_, _ = repo.List(ctx)
+	stats = repo.CacheStats()
+	if !stats.IsFresh {
+		t.Error("Cache should be fresh after List")
+	}
+	if stats.Size != 2 {
+		t.Errorf("Size = %d, want 2", stats.Size)
+	}
+	if stats.TTL != 1*time.Minute {
+		t.Errorf("TTL = %v, want 1m", stats.TTL)
+	}
+}
+
+func TestCachedProjectRepository_Concurrent(t *testing.T) {
+	mock := &mockProjectRepository{
+		projects: []domain.Project{
+			{ID: "proj-1", Name: "Project 1"},
+		},
+	}
+
+	repo := NewProjectRepository(mock, 50*time.Millisecond)
+	ctx := context.Background()
+
+	var wg sync.WaitGroup
+
+	// Concurrent List calls
+	for i := 0; i < 50; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			repo.List(ctx)
+		}()
+	}
+
+	// Concurrent Get calls
+	for i := 0; i < 50; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			_, _ = repo.Get(ctx, "proj-1")
+		}()
+	}
+
+	// Concurrent Exists calls
+	for i := 0; i < 50; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			_, _ = repo.Exists(ctx, "proj-1")
+		}()
+	}
+
+	wg.Wait()
+	// Test passes if no race/deadlock
+}
+
+func TestNewProjectRepository_DefaultTTL(t *testing.T) {
+	mock := &mockProjectRepository{}
+	repo := NewProjectRepository(mock, 0) // Zero TTL should use default
+
+	stats := repo.CacheStats()
+	if stats.TTL != 30*time.Second {
+		t.Errorf("TTL = %v, want 30s (default)", stats.TTL)
+	}
+}
--- a/internal/adapter/kubernetes/client.go
+++ b/internal/adapter/kubernetes/client.go
@ -0,0 +1,72 @@
+// Package kubernetes provides Kubernetes-based implementations of port interfaces.
+package kubernetes
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+	"k8s.io/client-go/tools/clientcmd"
+)
+
+// ClientConfig holds configuration for the Kubernetes client.
+type ClientConfig struct {
+	// Namespace is the K8s namespace to operate in.
+	Namespace string
+	// Kubeconfig is the path to the kubeconfig file (optional, for local dev).
+	Kubeconfig string
+}
+
+// NewClient creates a new Kubernetes clientset.
+// When running in-cluster, it uses the service account token.
+// When running locally, it uses the kubeconfig file.
+func NewClient(cfg ClientConfig) (*kubernetes.Clientset, error) {
+	var config *rest.Config
+	var err error
+
+	// Try in-cluster config first (when running in K8s)
+	config, err = rest.InClusterConfig()
+	if err != nil {
+		// Fall back to kubeconfig for local development
+		kubeconfigPath := cfg.Kubeconfig
+		if kubeconfigPath == "" {
+			kubeconfigPath = defaultKubeconfigPath()
+		}
+
+		config, err = clientcmd.BuildConfigFromFlags("", kubeconfigPath)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create k8s config: %w", err)
+		}
+	}
+
+	clientset, err := kubernetes.NewForConfig(config)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create k8s clientset: %w", err)
+	}
+
+	return clientset, nil
+}
+
+// NewClientOrNil creates a K8s client, returning nil if it fails.
+// This is useful for graceful fallback to hardcoded projects.
+func NewClientOrNil(cfg ClientConfig) *kubernetes.Clientset {
+	client, err := NewClient(cfg)
+	if err != nil {
+		return nil
+	}
+	return client
+}
+
+// defaultKubeconfigPath returns the default kubeconfig path.
+func defaultKubeconfigPath() string {
+	if kubeconfig := os.Getenv("KUBECONFIG"); kubeconfig != "" {
+		return kubeconfig
+	}
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return ""
+	}
+	return filepath.Join(home, ".kube", "config")
+}
--- a/internal/adapter/kubernetes/executor.go
+++ b/internal/adapter/kubernetes/executor.go
@ -210,3 +210,27 @@ func (e *Executor) CheckConnection(ctx context.Context) error {
 	cmd := exec.CommandContext(ctx, "kubectl", "cluster-info", "--request-timeout=5s")
 	return cmd.Run()
 }
+
+// ExecSimple executes a shell command and returns the output as a string.
+// This is a convenience method for simple commands that don't need streaming.
+func (e *Executor) ExecSimple(podName, command string) (string, error) {
+	e.mu.RLock()
+	namespace := e.namespace
+	e.mu.RUnlock()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	args := []string{
+		"exec", "-n", namespace, podName, "-c", "claudebox", "--",
+		"bash", "-c", command,
+	}
+
+	cmd := exec.CommandContext(ctx, "kubectl", args...)
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		return string(output), err
+	}
+
+	return string(output), nil
+}
--- a/internal/adapter/kubernetes/project_repository.go
+++ b/internal/adapter/kubernetes/project_repository.go
@ -0,0 +1,421 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"os/exec"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/port"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/watch"
+	"k8s.io/client-go/kubernetes"
+)
+
+// ProjectRepository implements port.ProjectRepository using Kubernetes.
+type ProjectRepository struct {
+	namespace string
+	client    *kubernetes.Clientset
+	logger    *slog.Logger
+
+	projects map[domain.ProjectID]*domain.Project
+	mu       sync.RWMutex
+
+	// Watch management
+	watchCancel context.CancelFunc
+	watchWg     sync.WaitGroup
+}
+
+// NewProjectRepository creates a new Kubernetes project repository.
+// If client is nil, falls back to hardcoded projects (for local development).
+func NewProjectRepository(namespace string) *ProjectRepository {
+	return NewProjectRepositoryWithClient(namespace, nil, nil)
+}
+
+// NewProjectRepositoryWithClient creates a new Kubernetes project repository
+// with an optional K8s client for dynamic project discovery.
+func NewProjectRepositoryWithClient(namespace string, client *kubernetes.Clientset, logger *slog.Logger) *ProjectRepository {
+	if logger == nil {
+		logger = slog.Default()
+	}
+
+	r := &ProjectRepository{
+		namespace: namespace,
+		client:    client,
+		logger:    logger,
+		projects:  make(map[domain.ProjectID]*domain.Project),
+	}
+
+	// Initialize with fallback hardcoded projects
+	// These will be replaced by discovered projects if K8s client is available
+	r.initFallbackProjects()
+
+	return r
+}
+
+// initFallbackProjects adds hardcoded projects for when K8s client is unavailable.
+func (r *ProjectRepository) initFallbackProjects() {
+	r.projects["pantheon"] = &domain.Project{
+		ID:          "pantheon",
+		Name:        "Pantheon",
+		Description: "Go API backend",
+		PodName:     "claudebox-pantheon-0",
+		Status:      domain.ProjectStatusUnknown,
+		Workspace:   "/workspace",
+	}
+	r.projects["aeries"] = &domain.Project{
+		ID:          "aeries",
+		Name:        "Aeries",
+		Description: "Note community platform",
+		PodName:     "claudebox-aeries-0",
+		Status:      domain.ProjectStatusUnknown,
+		Workspace:   "/workspace",
+	}
+}
+
+// Ensure ProjectRepository implements port.ProjectRepository at compile time.
+var _ port.ProjectRepository = (*ProjectRepository)(nil)
+
+// List returns all available projects.
+func (r *ProjectRepository) List(ctx context.Context) ([]domain.Project, error) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	projects := make([]domain.Project, 0, len(r.projects))
+	for _, p := range r.projects {
+		projects = append(projects, *p)
+	}
+	return projects, nil
+}
+
+// Get returns a project by ID.
+func (r *ProjectRepository) Get(ctx context.Context, id domain.ProjectID) (*domain.Project, error) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	p, ok := r.projects[id]
+	if !ok {
+		return nil, domain.ErrProjectNotFound
+	}
+	return p, nil
+}
+
+// Exists checks if a project exists.
+func (r *ProjectRepository) Exists(ctx context.Context, id domain.ProjectID) (bool, error) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	_, ok := r.projects[id]
+	return ok, nil
+}
+
+// Register adds a new project to the repository.
+func (r *ProjectRepository) Register(ctx context.Context, project *domain.Project) error {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	r.projects[project.ID] = project
+	return nil
+}
+
+// Unregister removes a project from the repository.
+func (r *ProjectRepository) Unregister(ctx context.Context, id domain.ProjectID) error {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	delete(r.projects, id)
+	return nil
+}
+
+// RefreshStatus updates the status of all projects from K8s.
+// If K8s client is available, it also discovers new projects.
+func (r *ProjectRepository) RefreshStatus(ctx context.Context) error {
+	// Try to discover projects from K8s labels first
+	if r.client != nil {
+		if err := r.discoverProjects(ctx); err != nil {
+			r.logger.Warn("failed to discover projects from K8s, using fallback", "error", err)
+		}
+	}
+
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	for _, p := range r.projects {
+		status, err := r.getPodStatus(ctx, p.PodName)
+		if err != nil {
+			p.Status = domain.ProjectStatusError
+			continue
+		}
+		p.Status = status
+	}
+	return nil
+}
+
+// discoverProjects finds projects from pods with rdev labels.
+func (r *ProjectRepository) discoverProjects(ctx context.Context) error {
+	if r.client == nil {
+		return fmt.Errorf("k8s client not available")
+	}
+
+	// List pods with the rdev project label
+	labelSelector := fmt.Sprintf("%s=true", domain.LabelProject)
+
+	pods, err := r.client.CoreV1().Pods(r.namespace).List(ctx, metav1.ListOptions{
+		LabelSelector: labelSelector,
+	})
+	if err != nil {
+		return fmt.Errorf("list pods: %w", err)
+	}
+
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	// Track which projects we've seen to detect deletions
+	seen := make(map[domain.ProjectID]bool)
+
+	for _, pod := range pods.Items {
+		project := r.podToProject(&pod)
+		if project != nil {
+			seen[project.ID] = true
+			r.projects[project.ID] = project
+			r.logger.Debug("discovered project from pod",
+				"project_id", project.ID,
+				"pod_name", project.PodName,
+				"status", project.Status)
+		}
+	}
+
+	// Remove projects whose pods no longer exist (but keep fallback projects if no K8s client)
+	for id := range r.projects {
+		if !seen[id] {
+			// Only remove if we have at least one discovered project
+			// This prevents removing all fallback projects when K8s is unavailable
+			if len(seen) > 0 {
+				r.logger.Info("removing project (pod deleted)", "project_id", id)
+				delete(r.projects, id)
+			}
+		}
+	}
+
+	return nil
+}
+
+// podToProject converts a K8s pod to a domain.Project.
+// Returns nil if the pod doesn't have the required labels.
+func (r *ProjectRepository) podToProject(pod *corev1.Pod) *domain.Project {
+	labels := pod.Labels
+	annotations := pod.Annotations
+
+	// Check for required labels
+	if labels[domain.LabelProject] != "true" {
+		return nil
+	}
+
+	name := labels[domain.LabelName]
+	if name == "" {
+		// Fallback to pod name if name label is missing
+		name = pod.Name
+	}
+
+	workspace := labels[domain.LabelWorkspace]
+	if workspace == "" {
+		workspace = "/workspace" // Default workspace
+	}
+
+	description := ""
+	if annotations != nil {
+		description = annotations[domain.AnnotDescription]
+	}
+
+	// Convert pod phase to project status
+	status := r.phaseToStatus(pod.Status.Phase)
+
+	return &domain.Project{
+		ID:          domain.ProjectID(name),
+		Name:        capitalizeFirst(name),
+		Description: description,
+		PodName:     pod.Name,
+		Status:      status,
+		Workspace:   workspace,
+	}
+}
+
+// phaseToStatus converts K8s pod phase to domain.ProjectStatus.
+func (r *ProjectRepository) phaseToStatus(phase corev1.PodPhase) domain.ProjectStatus {
+	switch phase {
+	case corev1.PodRunning:
+		return domain.ProjectStatusRunning
+	case corev1.PodPending:
+		return domain.ProjectStatusPending
+	case corev1.PodFailed:
+		return domain.ProjectStatusFailed
+	case corev1.PodSucceeded:
+		// Succeeded is a terminal state, treat as not available
+		return domain.ProjectStatusNotFound
+	default:
+		return domain.ProjectStatusUnknown
+	}
+}
+
+// StartWatching begins watching for pod changes in the background.
+// Call StopWatching to stop the watch.
+func (r *ProjectRepository) StartWatching(ctx context.Context) error {
+	if r.client == nil {
+		return fmt.Errorf("k8s client not available for watching")
+	}
+
+	// Create a cancellable context for the watch
+	watchCtx, cancel := context.WithCancel(ctx)
+	r.watchCancel = cancel
+
+	r.watchWg.Add(1)
+	go r.watchLoop(watchCtx)
+
+	r.logger.Info("started watching for project pod changes", "namespace", r.namespace)
+	return nil
+}
+
+// StopWatching stops the background pod watch.
+func (r *ProjectRepository) StopWatching() {
+	if r.watchCancel != nil {
+		r.watchCancel()
+		r.watchWg.Wait()
+		r.watchCancel = nil
+		r.logger.Info("stopped watching for project pod changes")
+	}
+}
+
+// watchLoop continuously watches for pod changes.
+func (r *ProjectRepository) watchLoop(ctx context.Context) {
+	defer r.watchWg.Done()
+
+	labelSelector := fmt.Sprintf("%s=true", domain.LabelProject)
+	backoff := time.Second
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		default:
+		}
+
+		watcher, err := r.client.CoreV1().Pods(r.namespace).Watch(ctx, metav1.ListOptions{
+			LabelSelector: labelSelector,
+		})
+		if err != nil {
+			r.logger.Error("failed to create pod watch", "error", err)
+			time.Sleep(backoff)
+			backoff = min(backoff*2, time.Minute)
+			continue
+		}
+
+		backoff = time.Second // Reset backoff on successful connection
+
+		r.handleWatchEvents(ctx, watcher)
+		watcher.Stop()
+	}
+}
+
+// handleWatchEvents processes events from the pod watcher.
+func (r *ProjectRepository) handleWatchEvents(ctx context.Context, watcher watch.Interface) {
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case event, ok := <-watcher.ResultChan():
+			if !ok {
+				// Watch channel closed, need to reconnect
+				r.logger.Debug("watch channel closed, reconnecting")
+				return
+			}
+
+			pod, ok := event.Object.(*corev1.Pod)
+			if !ok {
+				continue
+			}
+
+			switch event.Type {
+			case watch.Added, watch.Modified:
+				project := r.podToProject(pod)
+				if project != nil {
+					r.mu.Lock()
+					existing, exists := r.projects[project.ID]
+					if !exists || existing.Status != project.Status {
+						r.logger.Info("project updated",
+							"event", event.Type,
+							"project_id", project.ID,
+							"status", project.Status)
+					}
+					r.projects[project.ID] = project
+					r.mu.Unlock()
+				}
+
+			case watch.Deleted:
+				project := r.podToProject(pod)
+				if project != nil {
+					r.mu.Lock()
+					r.logger.Info("project removed", "project_id", project.ID)
+					delete(r.projects, project.ID)
+					r.mu.Unlock()
+				}
+			}
+		}
+	}
+}
+
+// getPodStatus queries the status of a pod using kubectl (fallback method).
+func (r *ProjectRepository) getPodStatus(ctx context.Context, podName string) (domain.ProjectStatus, error) {
+	// If we have a K8s client, use it directly
+	if r.client != nil {
+		pod, err := r.client.CoreV1().Pods(r.namespace).Get(ctx, podName, metav1.GetOptions{})
+		if err != nil {
+			if strings.Contains(err.Error(), "not found") {
+				return domain.ProjectStatusNotFound, nil
+			}
+			return domain.ProjectStatusUnknown, fmt.Errorf("get pod: %w", err)
+		}
+		return r.phaseToStatus(pod.Status.Phase), nil
+	}
+
+	// Fallback to kubectl for local development
+	cmd := exec.CommandContext(ctx, "kubectl",
+		"get", "pod", podName,
+		"-n", r.namespace,
+		"-o", "jsonpath={.status.phase}",
+	)
+
+	output, err := cmd.Output()
+	if err != nil {
+		// Check if pod doesn't exist
+		if strings.Contains(err.Error(), "not found") {
+			return domain.ProjectStatusNotFound, nil
+		}
+		return domain.ProjectStatusUnknown, fmt.Errorf("get pod status: %w", err)
+	}
+
+	phase := strings.ToLower(strings.TrimSpace(string(output)))
+	switch phase {
+	case "running":
+		return domain.ProjectStatusRunning, nil
+	case "pending":
+		return domain.ProjectStatusPending, nil
+	case "failed":
+		return domain.ProjectStatusFailed, nil
+	default:
+		return domain.ProjectStatusUnknown, nil
+	}
+}
+
+// capitalizeFirst capitalizes the first letter of a string.
+func capitalizeFirst(s string) string {
+	if s == "" {
+		return s
+	}
+	return strings.ToUpper(s[:1]) + s[1:]
+}
--- a/internal/adapter/memory/project_repository_test.go
+++ b/internal/adapter/memory/project_repository_test.go
@ -0,0 +1,270 @@
+package memory
+
+import (
+	"context"
+	"sync"
+	"testing"
+
+	"github.com/orchard9/rdev/internal/domain"
+)
+
+func TestProjectRepository_RegisterAndGet(t *testing.T) {
+	repo := NewProjectRepository()
+	ctx := context.Background()
+
+	project := &domain.Project{
+		ID:          "test-project",
+		Name:        "Test Project",
+		Description: "A test project",
+		PodName:     "test-pod-0",
+		Workspace:   "/workspace",
+		Status:      domain.ProjectStatusRunning,
+	}
+
+	// Register
+	if err := repo.Register(ctx, project); err != nil {
+		t.Fatalf("Register() error = %v", err)
+	}
+
+	// Get
+	retrieved, err := repo.Get(ctx, "test-project")
+	if err != nil {
+		t.Fatalf("Get() error = %v", err)
+	}
+
+	if retrieved.ID != project.ID {
+		t.Errorf("ID = %q, want %q", retrieved.ID, project.ID)
+	}
+	if retrieved.Name != project.Name {
+		t.Errorf("Name = %q, want %q", retrieved.Name, project.Name)
+	}
+	if retrieved.Status != project.Status {
+		t.Errorf("Status = %q, want %q", retrieved.Status, project.Status)
+	}
+}
+
+func TestProjectRepository_GetNotFound(t *testing.T) {
+	repo := NewProjectRepository()
+	ctx := context.Background()
+
+	_, err := repo.Get(ctx, "nonexistent")
+	if err != domain.ErrProjectNotFound {
+		t.Errorf("Get() error = %v, want %v", err, domain.ErrProjectNotFound)
+	}
+}
+
+func TestProjectRepository_List(t *testing.T) {
+	repo := NewProjectRepository()
+	ctx := context.Background()
+
+	// Empty list initially
+	projects, err := repo.List(ctx)
+	if err != nil {
+		t.Fatalf("List() error = %v", err)
+	}
+	if len(projects) != 0 {
+		t.Errorf("Initial List() length = %d, want 0", len(projects))
+	}
+
+	// Register some projects
+	for i := 0; i < 3; i++ {
+		p := &domain.Project{
+			ID:   domain.ProjectID("project-" + string(rune('a'+i))),
+			Name: "Project " + string(rune('A'+i)),
+		}
+		_ = repo.Register(ctx, p)
+	}
+
+	// List should return all
+	projects, err = repo.List(ctx)
+	if err != nil {
+		t.Fatalf("List() error = %v", err)
+	}
+	if len(projects) != 3 {
+		t.Errorf("List() length = %d, want 3", len(projects))
+	}
+}
+
+func TestProjectRepository_Exists(t *testing.T) {
+	repo := NewProjectRepository()
+	ctx := context.Background()
+
+	project := &domain.Project{
+		ID:   "existing-project",
+		Name: "Existing",
+	}
+	_ = repo.Register(ctx, project)
+
+	tests := []struct {
+		id   domain.ProjectID
+		want bool
+	}{
+		{"existing-project", true},
+		{"nonexistent", false},
+	}
+
+	for _, tt := range tests {
+		exists, err := repo.Exists(ctx, tt.id)
+		if err != nil {
+			t.Errorf("Exists(%q) error = %v", tt.id, err)
+		}
+		if exists != tt.want {
+			t.Errorf("Exists(%q) = %v, want %v", tt.id, exists, tt.want)
+		}
+	}
+}
+
+func TestProjectRepository_Unregister(t *testing.T) {
+	repo := NewProjectRepository()
+	ctx := context.Background()
+
+	project := &domain.Project{
+		ID:   "to-remove",
+		Name: "To Remove",
+	}
+	repo.Register(ctx, project)
+
+	// Verify it exists
+	exists, _ := repo.Exists(ctx, "to-remove")
+	if !exists {
+		t.Fatal("Project should exist after register")
+	}
+
+	// Unregister
+	if err := repo.Unregister(ctx, "to-remove"); err != nil {
+		t.Fatalf("Unregister() error = %v", err)
+	}
+
+	// Verify it's gone
+	exists, _ = repo.Exists(ctx, "to-remove")
+	if exists {
+		t.Error("Project should not exist after unregister")
+	}
+}
+
+func TestProjectRepository_UnregisterNonexistent(t *testing.T) {
+	repo := NewProjectRepository()
+	ctx := context.Background()
+
+	// Unregistering non-existent project should not error
+	if err := repo.Unregister(ctx, "nonexistent"); err != nil {
+		t.Errorf("Unregister(nonexistent) error = %v, want nil", err)
+	}
+}
+
+func TestProjectRepository_SetStatus(t *testing.T) {
+	repo := NewProjectRepository()
+	ctx := context.Background()
+
+	project := &domain.Project{
+		ID:     "status-test",
+		Name:   "Status Test",
+		Status: domain.ProjectStatusPending,
+	}
+	repo.Register(ctx, project)
+
+	// Change status
+	repo.SetStatus("status-test", domain.ProjectStatusRunning)
+
+	// Verify
+	retrieved, _ := repo.Get(ctx, "status-test")
+	if retrieved.Status != domain.ProjectStatusRunning {
+		t.Errorf("Status = %q, want %q", retrieved.Status, domain.ProjectStatusRunning)
+	}
+
+	// Change to error
+	repo.SetStatus("status-test", domain.ProjectStatusError)
+	retrieved, _ = repo.Get(ctx, "status-test")
+	if retrieved.Status != domain.ProjectStatusError {
+		t.Errorf("Status = %q, want %q", retrieved.Status, domain.ProjectStatusError)
+	}
+}
+
+func TestProjectRepository_SetStatusNonexistent(t *testing.T) {
+	repo := NewProjectRepository()
+
+	// Should not panic on nonexistent project
+	repo.SetStatus("nonexistent", domain.ProjectStatusRunning)
+	// No error expected, just a no-op
+}
+
+func TestProjectRepository_RefreshStatus(t *testing.T) {
+	repo := NewProjectRepository()
+	ctx := context.Background()
+
+	// RefreshStatus is a no-op for memory implementation
+	if err := repo.RefreshStatus(ctx); err != nil {
+		t.Errorf("RefreshStatus() error = %v, want nil", err)
+	}
+}
+
+func TestProjectRepository_RegisterOverwrite(t *testing.T) {
+	repo := NewProjectRepository()
+	ctx := context.Background()
+
+	// Register initial
+	p1 := &domain.Project{
+		ID:     "overwrite-test",
+		Name:   "Original",
+		Status: domain.ProjectStatusPending,
+	}
+	repo.Register(ctx, p1)
+
+	// Register with same ID, different data
+	p2 := &domain.Project{
+		ID:     "overwrite-test",
+		Name:   "Updated",
+		Status: domain.ProjectStatusRunning,
+	}
+	repo.Register(ctx, p2)
+
+	// Should have updated data
+	retrieved, _ := repo.Get(ctx, "overwrite-test")
+	if retrieved.Name != "Updated" {
+		t.Errorf("Name = %q, want %q", retrieved.Name, "Updated")
+	}
+	if retrieved.Status != domain.ProjectStatusRunning {
+		t.Errorf("Status = %q, want %q", retrieved.Status, domain.ProjectStatusRunning)
+	}
+}
+
+func TestProjectRepository_ConcurrentAccess(t *testing.T) {
+	repo := NewProjectRepository()
+	ctx := context.Background()
+
+	var wg sync.WaitGroup
+
+	// Concurrent register
+	for i := 0; i < 50; i++ {
+		wg.Add(1)
+		go func(id int) {
+			defer wg.Done()
+			p := &domain.Project{
+				ID:   domain.ProjectID(string(rune('a' + id%26))),
+				Name: "Project",
+			}
+			repo.Register(ctx, p)
+		}(i)
+	}
+
+	// Concurrent read
+	for i := 0; i < 50; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			repo.List(ctx)
+		}()
+	}
+
+	// Concurrent exists
+	for i := 0; i < 50; i++ {
+		wg.Add(1)
+		go func(id int) {
+			defer wg.Done()
+			_, _ = repo.Exists(ctx, domain.ProjectID(string(rune('a'+id%26))))
+		}(i)
+	}
+
+	wg.Wait()
+	// Test passes if no race/deadlock
+}
--- a/internal/adapter/memory/stream_publisher.go
+++ b/internal/adapter/memory/stream_publisher.go
@ -1,69 +1,197 @@
 package memory

 import (
+	"fmt"
 	"sync"
+	"sync/atomic"

 	"github.com/orchard9/rdev/internal/port"
 )

-// StreamPublisher is an in-memory implementation of port.StreamPublisher.
+// StreamPublisher is an in-memory implementation of port.StreamPublisher
+// with event ID generation and replay buffer support.
 type StreamPublisher struct {
 	mu      sync.RWMutex
-	streams map[string][]chan port.StreamEvent
+	streams map[string]*streamState
+}
+
+// subscriber wraps a channel with closed state to prevent send-on-closed-channel races.
+type subscriber struct {
+	ch     chan port.StreamEvent
+	closed atomic.Bool
+	mu     sync.Mutex // protects close and send operations
+}
+
+// trySend attempts to send an event to the subscriber's channel.
+// Returns false if the subscriber is closed or channel is full.
+// This is safe to call concurrently with close operations.
+func (s *subscriber) trySend(event port.StreamEvent) bool {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	if s.closed.Load() {
+		return false
+	}
+
+	select {
+	case s.ch <- event:
+		return true
+	default:
+		return false // Channel full
+	}
+}
+
+// doClose closes the subscriber channel safely.
+// This is safe to call concurrently with send operations.
+func (s *subscriber) doClose() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	if !s.closed.Swap(true) {
+		// Only close if we're the one who set closed to true
+		close(s.ch)
+	}
+}
+
+// streamState holds the state for a single stream.
+type streamState struct {
+	subscribers []*subscriber
+	eventBuffer []port.StreamEvent // Circular buffer for replay
+	eventSeq    atomic.Uint64      // Monotonic event sequence
+	bufferSize  int                // Max events to keep for replay
 }

 // NewStreamPublisher creates a new in-memory stream publisher.
 func NewStreamPublisher() *StreamPublisher {
 	return &StreamPublisher{
-		streams: make(map[string][]chan port.StreamEvent),
+		streams: make(map[string]*streamState),
 	}
 }

 // Ensure StreamPublisher implements port.StreamPublisher at compile time.
 var _ port.StreamPublisher = (*StreamPublisher)(nil)

-// Subscribe creates a subscription to events for the given stream ID.
-func (sp *StreamPublisher) Subscribe(streamID string) (<-chan port.StreamEvent, func()) {
+// getOrCreateStream returns the stream state, creating it if necessary.
+func (sp *StreamPublisher) getOrCreateStream(streamID string) *streamState {
 	sp.mu.Lock()
 	defer sp.mu.Unlock()

-	ch := make(chan port.StreamEvent, 100)
-	sp.streams[streamID] = append(sp.streams[streamID], ch)
+	state, exists := sp.streams[streamID]
+	if !exists {
+		state = &streamState{
+			eventBuffer: make([]port.StreamEvent, 0, 100),
+			bufferSize:  100, // Keep last 100 events for replay
+		}
+		sp.streams[streamID] = state
+	}
+	return state
+}
+
+// Subscribe creates a subscription to events for the given stream ID.
+func (sp *StreamPublisher) Subscribe(streamID string) (<-chan port.StreamEvent, func()) {
+	return sp.SubscribeFromID(streamID, "")
+}
+
+// SubscribeFromID creates a subscription starting from a specific event ID.
+// Events since lastEventID will be replayed before new events are delivered.
+func (sp *StreamPublisher) SubscribeFromID(streamID string, lastEventID string) (<-chan port.StreamEvent, func()) {
+	state := sp.getOrCreateStream(streamID)
+
+	sp.mu.Lock()
+	defer sp.mu.Unlock()
+
+	sub := &subscriber{
+		ch: make(chan port.StreamEvent, 100),
+	}
+	state.subscribers = append(state.subscribers, sub)
+
+	// Replay events if lastEventID is provided
+	if lastEventID != "" {
+		go sp.replayEvents(sub, state, lastEventID)
+	}

 	// Return cleanup function
 	cleanup := func() {
-		sp.unsubscribe(streamID, ch)
+		sp.unsubscribe(streamID, sub)
 	}

-	return ch, cleanup
+	return sub.ch, cleanup
 }

-func (sp *StreamPublisher) unsubscribe(streamID string, ch chan port.StreamEvent) {
+// replayEvents sends buffered events that occurred after lastEventID.
+func (sp *StreamPublisher) replayEvents(sub *subscriber, state *streamState, lastEventID string) {
+	sp.mu.RLock()
+	defer sp.mu.RUnlock()
+
+	found := false
+	for _, event := range state.eventBuffer {
+		if found {
+			if !sub.trySend(event) && sub.closed.Load() {
+				return // Subscriber closed, stop replay
+			}
+		}
+		if event.ID == lastEventID {
+			found = true
+		}
+	}
+
+	// If we didn't find the lastEventID (too old), replay all buffered events
+	if !found && lastEventID != "" {
+		for _, event := range state.eventBuffer {
+			if !sub.trySend(event) && sub.closed.Load() {
+				return // Subscriber closed, stop replay
+			}
+		}
+	}
+}
+
+func (sp *StreamPublisher) unsubscribe(streamID string, sub *subscriber) {
+	// Close the subscriber channel safely (handles concurrent sends)
+	sub.doClose()
+
 	sp.mu.Lock()
 	defer sp.mu.Unlock()

-	channels := sp.streams[streamID]
-	for i, c := range channels {
-		if c == ch {
-			sp.streams[streamID] = append(channels[:i], channels[i+1:]...)
-			close(ch)
+	state, exists := sp.streams[streamID]
+	if !exists {
+		return
+	}
+
+	for i, s := range state.subscribers {
+		if s == sub {
+			state.subscribers = append(state.subscribers[:i], state.subscribers[i+1:]...)
 			break
 		}
 	}
 }

 // Publish sends an event to all subscribers of a stream.
-func (sp *StreamPublisher) Publish(streamID string, event port.StreamEvent) {
-	sp.mu.RLock()
-	defer sp.mu.RUnlock()
+// Returns the generated event ID.
+func (sp *StreamPublisher) Publish(streamID string, event port.StreamEvent) string {
+	state := sp.getOrCreateStream(streamID)

-	for _, ch := range sp.streams[streamID] {
-		select {
-		case ch <- event:
-		default:
-			// Channel full, skip
+	// Generate event ID
+	seq := state.eventSeq.Add(1)
+	event.ID = fmt.Sprintf("%s:%d", streamID, seq)
+
+	sp.mu.Lock()
+	// Add to buffer for replay
+	if len(state.eventBuffer) >= state.bufferSize {
+		// Remove oldest event
+		state.eventBuffer = state.eventBuffer[1:]
 	}
+	state.eventBuffer = append(state.eventBuffer, event)
+	// Copy subscriber pointers (safe - trySend handles concurrent close)
+	subscribers := make([]*subscriber, len(state.subscribers))
+	copy(subscribers, state.subscribers)
+	sp.mu.Unlock()
+
+	// Send to all subscribers using thread-safe trySend
+	for _, sub := range subscribers {
+		sub.trySend(event)
 	}
+
+	return event.ID
 }

 // Close closes a stream and all its subscriptions.
@ -71,8 +199,13 @@ func (sp *StreamPublisher) Close(streamID string) {
 	sp.mu.Lock()
 	defer sp.mu.Unlock()

-	for _, ch := range sp.streams[streamID] {
-		close(ch)
+	state, exists := sp.streams[streamID]
+	if !exists {
+		return
+	}
+
+	for _, sub := range state.subscribers {
+		sub.doClose()
 	}
 	delete(sp.streams, streamID)
 }
@ -82,5 +215,21 @@ func (sp *StreamPublisher) SubscriberCount(streamID string) int {
 	sp.mu.RLock()
 	defer sp.mu.RUnlock()

-	return len(sp.streams[streamID])
+	state, exists := sp.streams[streamID]
+	if !exists {
+		return 0
+	}
+	return len(state.subscribers)
+}
+
+// BufferedEventCount returns the number of buffered events for a stream (for testing).
+func (sp *StreamPublisher) BufferedEventCount(streamID string) int {
+	sp.mu.RLock()
+	defer sp.mu.RUnlock()
+
+	state, exists := sp.streams[streamID]
+	if !exists {
+		return 0
+	}
+	return len(state.eventBuffer)
 }
--- a/internal/adapter/memory/stream_publisher_test.go
+++ b/internal/adapter/memory/stream_publisher_test.go
@ -0,0 +1,371 @@
+package memory
+
+import (
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/orchard9/rdev/internal/port"
+)
+
+func TestStreamPublisher_PublishAndSubscribe(t *testing.T) {
+	sp := NewStreamPublisher()
+	streamID := "test-stream-1"
+
+	// Subscribe first
+	ch, cleanup := sp.Subscribe(streamID)
+	defer cleanup()
+
+	// Publish an event
+	event := port.StreamEvent{
+		Type: "output",
+		Data: map[string]any{"line": "hello"},
+	}
+	eventID := sp.Publish(streamID, event)
+
+	// Receive the event
+	select {
+	case received := <-ch:
+		if received.Type != "output" {
+			t.Errorf("Type = %q, want %q", received.Type, "output")
+		}
+		if received.ID != eventID {
+			t.Errorf("ID = %q, want %q", received.ID, eventID)
+		}
+		if received.Data["line"] != "hello" {
+			t.Errorf("Data[line] = %q, want %q", received.Data["line"], "hello")
+		}
+	case <-time.After(time.Second):
+		t.Fatal("Timeout waiting for event")
+	}
+}
+
+func TestStreamPublisher_EventIDGeneration(t *testing.T) {
+	sp := NewStreamPublisher()
+	streamID := "test-stream-ids"
+
+	id1 := sp.Publish(streamID, port.StreamEvent{Type: "e1"})
+	id2 := sp.Publish(streamID, port.StreamEvent{Type: "e2"})
+	id3 := sp.Publish(streamID, port.StreamEvent{Type: "e3"})
+
+	// IDs should be sequential
+	if id1 == id2 || id2 == id3 || id1 == id3 {
+		t.Errorf("Event IDs should be unique: %q, %q, %q", id1, id2, id3)
+	}
+
+	// IDs should contain stream ID
+	for _, id := range []string{id1, id2, id3} {
+		if len(id) == 0 {
+			t.Error("Event ID should not be empty")
+		}
+	}
+}
+
+func TestStreamPublisher_MultipleSubscribers(t *testing.T) {
+	sp := NewStreamPublisher()
+	streamID := "test-multi-sub"
+
+	// Create multiple subscribers
+	ch1, cleanup1 := sp.Subscribe(streamID)
+	defer cleanup1()
+
+	ch2, cleanup2 := sp.Subscribe(streamID)
+	defer cleanup2()
+
+	ch3, cleanup3 := sp.Subscribe(streamID)
+	defer cleanup3()
+
+	// Verify subscriber count
+	if count := sp.SubscriberCount(streamID); count != 3 {
+		t.Errorf("SubscriberCount = %d, want 3", count)
+	}
+
+	// Publish an event
+	sp.Publish(streamID, port.StreamEvent{Type: "broadcast"})
+
+	// All subscribers should receive
+	for i, ch := range []<-chan port.StreamEvent{ch1, ch2, ch3} {
+		select {
+		case e := <-ch:
+			if e.Type != "broadcast" {
+				t.Errorf("Subscriber %d: Type = %q, want %q", i+1, e.Type, "broadcast")
+			}
+		case <-time.After(time.Second):
+			t.Errorf("Subscriber %d: Timeout waiting for event", i+1)
+		}
+	}
+}
+
+func TestStreamPublisher_SubscriberCleanup(t *testing.T) {
+	sp := NewStreamPublisher()
+	streamID := "test-cleanup"
+
+	ch, cleanup := sp.Subscribe(streamID)
+
+	if count := sp.SubscriberCount(streamID); count != 1 {
+		t.Errorf("SubscriberCount before cleanup = %d, want 1", count)
+	}
+
+	// Cleanup
+	cleanup()
+
+	if count := sp.SubscriberCount(streamID); count != 0 {
+		t.Errorf("SubscriberCount after cleanup = %d, want 0", count)
+	}
+
+	// Channel should be closed
+	select {
+	case _, ok := <-ch:
+		if ok {
+			t.Error("Channel should be closed after cleanup")
+		}
+	default:
+		t.Error("Channel should be closed (not blocked)")
+	}
+}
+
+func TestStreamPublisher_EventReplay(t *testing.T) {
+	sp := NewStreamPublisher()
+	streamID := "test-replay"
+
+	// Publish some events before subscribing
+	id1 := sp.Publish(streamID, port.StreamEvent{Type: "event1", Data: map[string]any{"n": 1}})
+	sp.Publish(streamID, port.StreamEvent{Type: "event2", Data: map[string]any{"n": 2}})
+	sp.Publish(streamID, port.StreamEvent{Type: "event3", Data: map[string]any{"n": 3}})
+
+	// Subscribe from id1 - should replay events after id1
+	ch, cleanup := sp.SubscribeFromID(streamID, id1)
+	defer cleanup()
+
+	// Give replay goroutine time to run
+	time.Sleep(50 * time.Millisecond)
+
+	// Should receive event2 and event3 (not event1 since we're replaying from id1)
+	var received []port.StreamEvent
+	timeout := time.After(time.Second)
+
+loop:
+	for {
+		select {
+		case e := <-ch:
+			received = append(received, e)
+			if len(received) >= 2 {
+				break loop
+			}
+		case <-timeout:
+			break loop
+		}
+	}
+
+	if len(received) != 2 {
+		t.Fatalf("Expected 2 replayed events, got %d", len(received))
+	}
+
+	if received[0].Data["n"] != 2 {
+		t.Errorf("First replayed event data = %v, want n=2", received[0].Data)
+	}
+	if received[1].Data["n"] != 3 {
+		t.Errorf("Second replayed event data = %v, want n=3", received[1].Data)
+	}
+}
+
+func TestStreamPublisher_EventBuffer(t *testing.T) {
+	sp := NewStreamPublisher()
+	streamID := "test-buffer"
+
+	// Publish events
+	for i := 0; i < 50; i++ {
+		sp.Publish(streamID, port.StreamEvent{Type: "event"})
+	}
+
+	if count := sp.BufferedEventCount(streamID); count != 50 {
+		t.Errorf("BufferedEventCount = %d, want 50", count)
+	}
+}
+
+func TestStreamPublisher_BufferOverflow(t *testing.T) {
+	sp := NewStreamPublisher()
+	streamID := "test-overflow"
+
+	// Publish more events than buffer size (100)
+	for i := 0; i < 150; i++ {
+		sp.Publish(streamID, port.StreamEvent{Type: "event"})
+	}
+
+	// Buffer should be capped at 100
+	if count := sp.BufferedEventCount(streamID); count != 100 {
+		t.Errorf("BufferedEventCount = %d, want 100 (buffer cap)", count)
+	}
+}
+
+func TestStreamPublisher_Close(t *testing.T) {
+	sp := NewStreamPublisher()
+	streamID := "test-close"
+
+	ch, _ := sp.Subscribe(streamID)
+
+	// Close the stream
+	sp.Close(streamID)
+
+	// Channel should be closed
+	select {
+	case _, ok := <-ch:
+		if ok {
+			t.Error("Channel should be closed")
+		}
+	case <-time.After(100 * time.Millisecond):
+		t.Error("Channel should be closed (not blocked)")
+	}
+
+	// Subscriber count should be 0
+	if count := sp.SubscriberCount(streamID); count != 0 {
+		t.Errorf("SubscriberCount after close = %d, want 0", count)
+	}
+}
+
+func TestStreamPublisher_IndependentStreams(t *testing.T) {
+	sp := NewStreamPublisher()
+
+	ch1, cleanup1 := sp.Subscribe("stream-a")
+	defer cleanup1()
+
+	ch2, cleanup2 := sp.Subscribe("stream-b")
+	defer cleanup2()
+
+	// Publish to stream-a only
+	sp.Publish("stream-a", port.StreamEvent{Type: "for-a"})
+
+	// stream-a subscriber should receive
+	select {
+	case e := <-ch1:
+		if e.Type != "for-a" {
+			t.Errorf("Stream-a received wrong event: %q", e.Type)
+		}
+	case <-time.After(time.Second):
+		t.Error("Stream-a subscriber should receive event")
+	}
+
+	// stream-b subscriber should NOT receive
+	select {
+	case e := <-ch2:
+		t.Errorf("Stream-b should not receive event from stream-a, got: %v", e)
+	case <-time.After(100 * time.Millisecond):
+		// Expected - no event for stream-b
+	}
+}
+
+func TestStreamPublisher_ConcurrentPublish(t *testing.T) {
+	sp := NewStreamPublisher()
+	streamID := "test-concurrent"
+
+	ch, cleanup := sp.Subscribe(streamID)
+	defer cleanup()
+
+	// Concurrent publishers
+	var wg sync.WaitGroup
+	for i := 0; i < 10; i++ {
+		wg.Add(1)
+		go func(id int) {
+			defer wg.Done()
+			for j := 0; j < 10; j++ {
+				sp.Publish(streamID, port.StreamEvent{Type: "concurrent"})
+			}
+		}(i)
+	}
+
+	// Collect events
+	done := make(chan bool)
+	var received int
+	go func() {
+		timeout := time.After(5 * time.Second)
+		for {
+			select {
+			case <-ch:
+				received++
+				if received >= 100 {
+					done <- true
+					return
+				}
+			case <-timeout:
+				done <- false
+				return
+			}
+		}
+	}()
+
+	wg.Wait()
+	success := <-done
+
+	if !success {
+		t.Errorf("Expected 100 events, received %d", received)
+	}
+}
+
+func TestStreamPublisher_ConcurrentSubscribeUnsubscribe(t *testing.T) {
+	sp := NewStreamPublisher()
+	streamID := "test-sub-unsub"
+
+	var wg sync.WaitGroup
+
+	// Concurrent subscribe/unsubscribe
+	for i := 0; i < 50; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			_, cleanup := sp.Subscribe(streamID)
+			time.Sleep(10 * time.Millisecond)
+			cleanup()
+		}()
+	}
+
+	// Concurrent publish
+	for i := 0; i < 10; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for j := 0; j < 10; j++ {
+				sp.Publish(streamID, port.StreamEvent{Type: "test"})
+			}
+		}()
+	}
+
+	wg.Wait()
+	// Test passes if no race/deadlock
+}
+
+func TestStreamPublisher_ReplayFromUnknownID(t *testing.T) {
+	sp := NewStreamPublisher()
+	streamID := "test-unknown-replay"
+
+	// Publish some events
+	sp.Publish(streamID, port.StreamEvent{Type: "e1", Data: map[string]any{"n": 1}})
+	sp.Publish(streamID, port.StreamEvent{Type: "e2", Data: map[string]any{"n": 2}})
+
+	// Subscribe from an ID that doesn't exist (should replay all)
+	ch, cleanup := sp.SubscribeFromID(streamID, "nonexistent-id")
+	defer cleanup()
+
+	// Give replay time
+	time.Sleep(50 * time.Millisecond)
+
+	// Should receive all buffered events
+	var received []port.StreamEvent
+	timeout := time.After(time.Second)
+
+loop:
+	for {
+		select {
+		case e := <-ch:
+			received = append(received, e)
+			if len(received) >= 2 {
+				break loop
+			}
+		case <-timeout:
+			break loop
+		}
+	}
+
+	if len(received) != 2 {
+		t.Errorf("Expected 2 events (full replay), got %d", len(received))
+	}
+}
--- a/internal/adapter/postgres/apikey_repository.go
+++ b/internal/adapter/postgres/apikey_repository.go
@ -33,10 +33,10 @@ func (r *APIKeyRepository) Create(ctx context.Context, key *domain.APIKey, keyHa

 	var id string
 	err := r.db.QueryRowContext(ctx, `
-		INSERT INTO api_keys (name, key_hash, key_prefix, scopes, project_ids, expires_at, created_by)
-		VALUES ($1, $2, $3, $4, $5, $6, $7)
+		INSERT INTO api_keys (name, key_hash, key_prefix, scopes, project_ids, allowed_ips, expires_at, created_by)
+		VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
 		RETURNING id
-	`, key.Name, keyHash, key.KeyPrefix, pq.Array(scopeStrings), pq.Array(projectIDStrings), key.ExpiresAt, key.CreatedBy).Scan(&id)
+	`, key.Name, keyHash, key.KeyPrefix, pq.Array(scopeStrings), pq.Array(projectIDStrings), pq.Array(key.AllowedIPs), key.ExpiresAt, key.CreatedBy).Scan(&id)

 	if err != nil {
 		return fmt.Errorf("insert key: %w", err)
@ -57,7 +57,7 @@ func (r *APIKeyRepository) GetByHash(ctx context.Context, keyHash string) (*doma
 	)

 	err := r.db.QueryRowContext(ctx, `
-		SELECT id, name, key_prefix, scopes, project_ids, created_at, expires_at, last_used_at, revoked_at, created_by
+		SELECT id, name, key_prefix, scopes, project_ids, allowed_ips, created_at, expires_at, last_used_at, revoked_at, created_by
 		FROM api_keys
 		WHERE key_hash = $1
 	`, keyHash).Scan(
@ -66,6 +66,7 @@ func (r *APIKeyRepository) GetByHash(ctx context.Context, keyHash string) (*doma
 		&key.KeyPrefix,
 		pq.Array(&scopeStrings),
 		pq.Array(&projectIDs),
+		pq.Array(&key.AllowedIPs),
 		&key.CreatedAt,
 		&key.ExpiresAt,
 		&key.LastUsedAt,
@ -97,7 +98,7 @@ func (r *APIKeyRepository) Get(ctx context.Context, id domain.APIKeyID) (*domain
 	)

 	err := r.db.QueryRowContext(ctx, `
-		SELECT id, name, key_prefix, scopes, project_ids, created_at, expires_at, last_used_at, revoked_at, created_by
+		SELECT id, name, key_prefix, scopes, project_ids, allowed_ips, created_at, expires_at, last_used_at, revoked_at, created_by
 		FROM api_keys
 		WHERE id = $1
 	`, string(id)).Scan(
@ -106,6 +107,7 @@ func (r *APIKeyRepository) Get(ctx context.Context, id domain.APIKeyID) (*domain
 		&key.KeyPrefix,
 		pq.Array(&scopeStrings),
 		pq.Array(&projectIDs),
+		pq.Array(&key.AllowedIPs),
 		&key.CreatedAt,
 		&key.ExpiresAt,
 		&key.LastUsedAt,
@ -130,14 +132,14 @@ func (r *APIKeyRepository) Get(ctx context.Context, id domain.APIKeyID) (*domain
 // List returns all API keys (without secrets).
 func (r *APIKeyRepository) List(ctx context.Context) ([]*domain.APIKey, error) {
 	rows, err := r.db.QueryContext(ctx, `
-		SELECT id, name, key_prefix, scopes, project_ids, created_at, expires_at, last_used_at, revoked_at, created_by
+		SELECT id, name, key_prefix, scopes, project_ids, allowed_ips, created_at, expires_at, last_used_at, revoked_at, created_by
 		FROM api_keys
 		ORDER BY created_at DESC
 	`)
 	if err != nil {
 		return nil, fmt.Errorf("query keys: %w", err)
 	}
-	defer rows.Close()
+	defer func() { _ = rows.Close() }()

 	var keys []*domain.APIKey
 	for rows.Next() {
@ -153,6 +155,7 @@ func (r *APIKeyRepository) List(ctx context.Context) ([]*domain.APIKey, error) {
 			&key.KeyPrefix,
 			pq.Array(&scopeStrings),
 			pq.Array(&projectIDs),
+			pq.Array(&key.AllowedIPs),
 			&key.CreatedAt,
 			&key.ExpiresAt,
 			&key.LastUsedAt,
--- a/internal/adapter/postgres/apikey_repository_test.go
+++ b/internal/adapter/postgres/apikey_repository_test.go
@ -0,0 +1,508 @@
+package postgres
+
+import (
+	"context"
+	"crypto/sha256"
+	"encoding/hex"
+	"testing"
+	"time"
+
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/testutil"
+)
+
+func hashKey(key string) string {
+	h := sha256.Sum256([]byte(key))
+	return hex.EncodeToString(h[:])
+}
+
+func TestAPIKeyRepository_Create(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { testutil.CleanupTestKeys(t, db) })
+
+	repo := NewAPIKeyRepository(db)
+	ctx := context.Background()
+
+	t.Run("creates key with all fields", func(t *testing.T) {
+		expires := time.Now().Add(24 * time.Hour)
+		key := &domain.APIKey{
+			Name:       "test-repo-create",
+			KeyPrefix:  "abc12345",
+			Scopes:     []domain.Scope{domain.ScopeProjectsRead, domain.ScopeKeysManage},
+			ProjectIDs: []domain.ProjectID{"proj-a", "proj-b"},
+			AllowedIPs: []string{"192.168.1.0/24", "10.0.0.1"},
+			ExpiresAt:  &expires,
+			CreatedBy:  "test-user",
+		}
+		keyHash := hashKey("test-key-123")
+
+		err := repo.Create(ctx, key, keyHash)
+		if err != nil {
+			t.Fatalf("Create() error = %v", err)
+		}
+
+		if key.ID == "" {
+			t.Error("ID should be set after create")
+		}
+
+		// Verify via GetByHash
+		retrieved, err := repo.GetByHash(ctx, keyHash)
+		if err != nil {
+			t.Fatalf("GetByHash() error = %v", err)
+		}
+
+		if retrieved.Name != "test-repo-create" {
+			t.Errorf("Name = %q, want %q", retrieved.Name, "test-repo-create")
+		}
+		if len(retrieved.Scopes) != 2 {
+			t.Errorf("Scopes length = %d, want 2", len(retrieved.Scopes))
+		}
+		if len(retrieved.ProjectIDs) != 2 {
+			t.Errorf("ProjectIDs length = %d, want 2", len(retrieved.ProjectIDs))
+		}
+		if len(retrieved.AllowedIPs) != 2 {
+			t.Errorf("AllowedIPs length = %d, want 2", len(retrieved.AllowedIPs))
+		}
+	})
+
+	t.Run("creates key with minimal fields", func(t *testing.T) {
+		key := &domain.APIKey{
+			Name:      "test-repo-minimal",
+			KeyPrefix: "min12345",
+			Scopes:    []domain.Scope{domain.ScopeProjectsRead},
+			CreatedBy: "test",
+		}
+		keyHash := hashKey("minimal-key-456")
+
+		err := repo.Create(ctx, key, keyHash)
+		if err != nil {
+			t.Fatalf("Create() error = %v", err)
+		}
+
+		retrieved, _ := repo.GetByHash(ctx, keyHash)
+		if retrieved.ExpiresAt != nil {
+			t.Error("ExpiresAt should be nil for keys without expiration")
+		}
+		if len(retrieved.ProjectIDs) != 0 {
+			t.Error("ProjectIDs should be empty")
+		}
+		if len(retrieved.AllowedIPs) != 0 {
+			t.Error("AllowedIPs should be empty")
+		}
+	})
+}
+
+func TestAPIKeyRepository_GetByHash(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { testutil.CleanupTestKeys(t, db) })
+
+	repo := NewAPIKeyRepository(db)
+	ctx := context.Background()
+
+	// Create a test key
+	keyHash := hashKey("get-by-hash-key")
+	key := &domain.APIKey{
+		Name:      "test-get-hash",
+		KeyPrefix: "geth1234",
+		Scopes:    []domain.Scope{domain.ScopeAdmin},
+		CreatedBy: "test",
+	}
+	_ = repo.Create(ctx, key, keyHash)
+
+	t.Run("finds existing key", func(t *testing.T) {
+		retrieved, err := repo.GetByHash(ctx, keyHash)
+		if err != nil {
+			t.Fatalf("GetByHash() error = %v", err)
+		}
+		if retrieved.Name != "test-get-hash" {
+			t.Errorf("Name = %q, want %q", retrieved.Name, "test-get-hash")
+		}
+	})
+
+	t.Run("returns error for nonexistent hash", func(t *testing.T) {
+		_, err := repo.GetByHash(ctx, hashKey("nonexistent"))
+		if err != domain.ErrKeyNotFound {
+			t.Errorf("GetByHash() error = %v, want %v", err, domain.ErrKeyNotFound)
+		}
+	})
+}
+
+func TestAPIKeyRepository_Get(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { testutil.CleanupTestKeys(t, db) })
+
+	repo := NewAPIKeyRepository(db)
+	ctx := context.Background()
+
+	// Create a test key
+	key := &domain.APIKey{
+		Name:      "test-get-by-id",
+		KeyPrefix: "getid123",
+		Scopes:    []domain.Scope{domain.ScopeProjectsRead},
+		CreatedBy: "test",
+	}
+	_ = repo.Create(ctx, key, hashKey("get-by-id-key"))
+
+	t.Run("finds existing key", func(t *testing.T) {
+		retrieved, err := repo.Get(ctx, key.ID)
+		if err != nil {
+			t.Fatalf("Get() error = %v", err)
+		}
+		if retrieved.Name != "test-get-by-id" {
+			t.Errorf("Name = %q, want %q", retrieved.Name, "test-get-by-id")
+		}
+	})
+
+	t.Run("returns error for nonexistent ID", func(t *testing.T) {
+		_, err := repo.Get(ctx, "00000000-0000-0000-0000-000000000000")
+		if err != domain.ErrKeyNotFound {
+			t.Errorf("Get() error = %v, want %v", err, domain.ErrKeyNotFound)
+		}
+	})
+}
+
+func TestAPIKeyRepository_List(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { testutil.CleanupTestKeys(t, db) })
+
+	repo := NewAPIKeyRepository(db)
+	ctx := context.Background()
+
+	// Create test keys
+	for i := 0; i < 3; i++ {
+		key := &domain.APIKey{
+			Name:      "test-list-" + string(rune('a'+i)),
+			KeyPrefix: "list1234",
+			Scopes:    []domain.Scope{domain.ScopeProjectsRead},
+			CreatedBy: "test",
+		}
+		_ = repo.Create(ctx, key, hashKey("list-key-"+string(rune('a'+i))))
+	}
+
+	keys, err := repo.List(ctx)
+	if err != nil {
+		t.Fatalf("List() error = %v", err)
+	}
+
+	// Count our test keys
+	testKeyCount := 0
+	for _, k := range keys {
+		if len(k.Name) >= 10 && k.Name[:10] == "test-list-" {
+			testKeyCount++
+		}
+	}
+
+	if testKeyCount != 3 {
+		t.Errorf("List() returned %d test keys, want 3", testKeyCount)
+	}
+}
+
+func TestAPIKeyRepository_Revoke(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { testutil.CleanupTestKeys(t, db) })
+
+	repo := NewAPIKeyRepository(db)
+	ctx := context.Background()
+
+	t.Run("revokes existing key", func(t *testing.T) {
+		key := &domain.APIKey{
+			Name:      "test-revoke",
+			KeyPrefix: "rev12345",
+			Scopes:    []domain.Scope{domain.ScopeProjectsRead},
+			CreatedBy: "test",
+		}
+		keyHash := hashKey("revoke-key")
+		repo.Create(ctx, key, keyHash)
+
+		err := repo.Revoke(ctx, key.ID)
+		if err != nil {
+			t.Fatalf("Revoke() error = %v", err)
+		}
+
+		// Verify revoked
+		retrieved, _ := repo.Get(ctx, key.ID)
+		if retrieved.RevokedAt == nil {
+			t.Error("RevokedAt should be set after revoke")
+		}
+	})
+
+	t.Run("returns error for nonexistent key", func(t *testing.T) {
+		err := repo.Revoke(ctx, "00000000-0000-0000-0000-000000000000")
+		if err != domain.ErrKeyNotFound {
+			t.Errorf("Revoke() error = %v, want %v", err, domain.ErrKeyNotFound)
+		}
+	})
+
+	t.Run("returns error for already revoked key", func(t *testing.T) {
+		key := &domain.APIKey{
+			Name:      "test-revoke-twice",
+			KeyPrefix: "rev21234",
+			Scopes:    []domain.Scope{domain.ScopeProjectsRead},
+			CreatedBy: "test",
+		}
+		_ = repo.Create(ctx, key, hashKey("revoke-twice-key"))
+
+		// First revoke
+		_ = repo.Revoke(ctx, key.ID)
+
+		// Second revoke should fail
+		err := repo.Revoke(ctx, key.ID)
+		if err != domain.ErrKeyNotFound {
+			t.Errorf("Second Revoke() error = %v, want %v", err, domain.ErrKeyNotFound)
+		}
+	})
+}
+
+func TestAPIKeyRepository_UpdateLastUsed(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { testutil.CleanupTestKeys(t, db) })
+
+	repo := NewAPIKeyRepository(db)
+	ctx := context.Background()
+
+	key := &domain.APIKey{
+		Name:      "test-last-used",
+		KeyPrefix: "lu123456",
+		Scopes:    []domain.Scope{domain.ScopeProjectsRead},
+		CreatedBy: "test",
+	}
+	repo.Create(ctx, key, hashKey("last-used-key"))
+
+	// Initial state - no last_used_at
+	retrieved, _ := repo.Get(ctx, key.ID)
+	if retrieved.LastUsedAt != nil {
+		t.Error("LastUsedAt should be nil initially")
+	}
+
+	// Update last used
+	err := repo.UpdateLastUsed(ctx, key.ID)
+	if err != nil {
+		t.Fatalf("UpdateLastUsed() error = %v", err)
+	}
+
+	// Verify updated
+	retrieved, _ = repo.Get(ctx, key.ID)
+	if retrieved.LastUsedAt == nil {
+		t.Error("LastUsedAt should be set after update")
+	}
+	if time.Since(*retrieved.LastUsedAt) > time.Minute {
+		t.Error("LastUsedAt should be recent")
+	}
+}
+
+func TestAPIKeyRepository_ScopeArrayHandling(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { testutil.CleanupTestKeys(t, db) })
+
+	repo := NewAPIKeyRepository(db)
+	ctx := context.Background()
+
+	tests := []struct {
+		name   string
+		scopes []domain.Scope
+	}{
+		{"single scope", []domain.Scope{domain.ScopeProjectsRead}},
+		{"multiple scopes", []domain.Scope{domain.ScopeProjectsRead, domain.ScopeProjectsExecute, domain.ScopeKeysManage}},
+		{"admin scope", []domain.Scope{domain.ScopeAdmin}},
+		{"all scopes", []domain.Scope{domain.ScopeProjectsRead, domain.ScopeProjectsExecute, domain.ScopeKeysManage, domain.ScopeKeysManage, domain.ScopeAdmin}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			key := &domain.APIKey{
+				Name:      "test-scopes-" + tt.name,
+				KeyPrefix: "sc123456",
+				Scopes:    tt.scopes,
+				CreatedBy: "test",
+			}
+			repo.Create(ctx, key, hashKey("scopes-"+tt.name))
+
+			retrieved, _ := repo.Get(ctx, key.ID)
+			if len(retrieved.Scopes) != len(tt.scopes) {
+				t.Errorf("Scopes length = %d, want %d", len(retrieved.Scopes), len(tt.scopes))
+			}
+
+			// Verify each scope
+			for _, expected := range tt.scopes {
+				found := false
+				for _, actual := range retrieved.Scopes {
+					if actual == expected {
+						found = true
+						break
+					}
+				}
+				if !found {
+					t.Errorf("Missing scope: %q", expected)
+				}
+			}
+		})
+	}
+}
+
+func TestAPIKeyRepository_ProjectIDArrayHandling(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { testutil.CleanupTestKeys(t, db) })
+
+	repo := NewAPIKeyRepository(db)
+	ctx := context.Background()
+
+	tests := []struct {
+		name       string
+		projectIDs []domain.ProjectID
+	}{
+		{"nil projects", nil},
+		{"empty projects", []domain.ProjectID{}},
+		{"single project", []domain.ProjectID{"proj-a"}},
+		{"multiple projects", []domain.ProjectID{"proj-a", "proj-b", "proj-c"}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			key := &domain.APIKey{
+				Name:       "test-projects-" + tt.name,
+				KeyPrefix:  "pr123456",
+				Scopes:     []domain.Scope{domain.ScopeProjectsRead},
+				ProjectIDs: tt.projectIDs,
+				CreatedBy:  "test",
+			}
+			repo.Create(ctx, key, hashKey("projects-"+tt.name))
+
+			retrieved, _ := repo.Get(ctx, key.ID)
+
+			expectedLen := 0
+			if tt.projectIDs != nil {
+				expectedLen = len(tt.projectIDs)
+			}
+
+			if len(retrieved.ProjectIDs) != expectedLen {
+				t.Errorf("ProjectIDs length = %d, want %d", len(retrieved.ProjectIDs), expectedLen)
+			}
+		})
+	}
+}
+
+func TestAPIKeyRepository_AllowedIPsArrayHandling(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { testutil.CleanupTestKeys(t, db) })
+
+	repo := NewAPIKeyRepository(db)
+	ctx := context.Background()
+
+	tests := []struct {
+		name       string
+		allowedIPs []string
+	}{
+		{"nil IPs", nil},
+		{"empty IPs", []string{}},
+		{"single IP", []string{"192.168.1.100"}},
+		{"CIDR", []string{"10.0.0.0/8"}},
+		{"mixed IPs and CIDRs", []string{"192.168.1.0/24", "10.0.0.1", "2001:db8::/32"}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			key := &domain.APIKey{
+				Name:       "test-ips-" + tt.name,
+				KeyPrefix:  "ip123456",
+				Scopes:     []domain.Scope{domain.ScopeProjectsRead},
+				AllowedIPs: tt.allowedIPs,
+				CreatedBy:  "test",
+			}
+			if err := repo.Create(ctx, key, hashKey("ips-"+tt.name)); err != nil {
+				t.Fatalf("Create() error = %v", err)
+			}
+
+			retrieved, err := repo.Get(ctx, key.ID)
+			if err != nil {
+				t.Fatalf("Get() error = %v", err)
+			}
+
+			expectedLen := 0
+			if tt.allowedIPs != nil {
+				expectedLen = len(tt.allowedIPs)
+			}
+
+			if len(retrieved.AllowedIPs) != expectedLen {
+				t.Errorf("AllowedIPs length = %d, want %d", len(retrieved.AllowedIPs), expectedLen)
+			}
+
+			// Verify content preserved
+			for i, expected := range tt.allowedIPs {
+				if i < len(retrieved.AllowedIPs) && retrieved.AllowedIPs[i] != expected {
+					t.Errorf("AllowedIPs[%d] = %q, want %q", i, retrieved.AllowedIPs[i], expected)
+				}
+			}
+		})
+	}
+}
+
+// Helper function conversion tests
+func TestScopesToStrings(t *testing.T) {
+	scopes := []domain.Scope{domain.ScopeProjectsRead, domain.ScopeAdmin}
+	strings := scopesToStrings(scopes)
+
+	if len(strings) != 2 {
+		t.Fatalf("Length = %d, want 2", len(strings))
+	}
+	if strings[0] != "projects:read" {
+		t.Errorf("strings[0] = %q, want %q", strings[0], "projects:read")
+	}
+	if strings[1] != "admin" {
+		t.Errorf("strings[1] = %q, want %q", strings[1], "admin")
+	}
+}
+
+func TestScopesFromStrings(t *testing.T) {
+	strings := []string{"projects:read", "keys:manage"}
+	scopes := scopesFromStrings(strings)
+
+	if len(scopes) != 2 {
+		t.Fatalf("Length = %d, want 2", len(scopes))
+	}
+	if scopes[0] != domain.ScopeProjectsRead {
+		t.Errorf("scopes[0] = %q, want %q", scopes[0], domain.ScopeProjectsRead)
+	}
+	if scopes[1] != domain.ScopeKeysManage {
+		t.Errorf("scopes[1] = %q, want %q", scopes[1], domain.ScopeKeysManage)
+	}
+}
+
+func TestProjectIDsToStrings(t *testing.T) {
+	t.Run("nil input", func(t *testing.T) {
+		result := projectIDsToStrings(nil)
+		if result != nil {
+			t.Errorf("Expected nil, got %v", result)
+		}
+	})
+
+	t.Run("non-nil input", func(t *testing.T) {
+		ids := []domain.ProjectID{"proj-a", "proj-b"}
+		result := projectIDsToStrings(ids)
+		if len(result) != 2 {
+			t.Fatalf("Length = %d, want 2", len(result))
+		}
+		if result[0] != "proj-a" || result[1] != "proj-b" {
+			t.Errorf("Unexpected result: %v", result)
+		}
+	})
+}
+
+func TestProjectIDsFromStrings(t *testing.T) {
+	t.Run("nil input", func(t *testing.T) {
+		result := projectIDsFromStrings(nil)
+		if result != nil {
+			t.Errorf("Expected nil, got %v", result)
+		}
+	})
+
+	t.Run("non-nil input", func(t *testing.T) {
+		strings := []string{"proj-x", "proj-y"}
+		result := projectIDsFromStrings(strings)
+		if len(result) != 2 {
+			t.Fatalf("Length = %d, want 2", len(result))
+		}
+		if result[0] != "proj-x" || result[1] != "proj-y" {
+			t.Errorf("Unexpected result: %v", result)
+		}
+	})
+}
--- a/internal/adapter/postgres/audit_logger.go
+++ b/internal/adapter/postgres/audit_logger.go
@ -0,0 +1,268 @@
+// Package postgres provides PostgreSQL-based implementations of port interfaces.
+package postgres
+
+import (
+	"context"
+	"database/sql"
+	"errors"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/port"
+)
+
+// AuditLogger implements port.AuditLogger using PostgreSQL.
+type AuditLogger struct {
+	db *sql.DB
+}
+
+// NewAuditLogger creates a new PostgreSQL audit logger.
+func NewAuditLogger(db *sql.DB) *AuditLogger {
+	return &AuditLogger{db: db}
+}
+
+// Ensure AuditLogger implements port.AuditLogger at compile time.
+var _ port.AuditLogger = (*AuditLogger)(nil)
+
+// LogCommandStart records the start of a command execution.
+func (l *AuditLogger) LogCommandStart(ctx context.Context, entry *domain.AuditLogEntry) error {
+	_, err := l.db.ExecContext(ctx, `
+		INSERT INTO audit_log (
+			id, api_key_id, command_id, project_id, command_type, args,
+			client_ip, user_agent, started_at, status, output_size_bytes
+		) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
+	`,
+		entry.ID,
+		entry.APIKeyID,
+		entry.CommandID,
+		entry.ProjectID,
+		string(entry.CommandType),
+		entry.Args,
+		entry.ClientIP,
+		entry.UserAgent,
+		entry.StartedAt,
+		string(domain.AuditStatusRunning),
+		entry.OutputSizeBytes,
+	)
+	if err != nil {
+		return fmt.Errorf("insert audit log: %w", err)
+	}
+	return nil
+}
+
+// LogCommandEnd records the completion of a command execution.
+func (l *AuditLogger) LogCommandEnd(ctx context.Context, commandID string, result *domain.AuditResult) error {
+	completedAt := time.Now()
+	_, err := l.db.ExecContext(ctx, `
+		UPDATE audit_log
+		SET completed_at = $1,
+			exit_code = $2,
+			duration_ms = $3,
+			status = $4,
+			error_message = $5,
+			output_size_bytes = $6
+		WHERE command_id = $7
+	`,
+		completedAt,
+		result.ExitCode,
+		result.DurationMs,
+		string(result.Status),
+		result.ErrorMessage,
+		result.OutputSizeBytes,
+		commandID,
+	)
+	if err != nil {
+		return fmt.Errorf("update audit log: %w", err)
+	}
+	return nil
+}
+
+// List returns audit log entries matching the given filters.
+func (l *AuditLogger) List(ctx context.Context, filters domain.AuditFilters) ([]domain.AuditLogEntry, error) {
+	query := strings.Builder{}
+	query.WriteString(`
+		SELECT id, api_key_id, command_id, project_id, command_type, args,
+			client_ip, user_agent, started_at, completed_at, exit_code,
+			duration_ms, status, error_message, output_size_bytes, created_at
+		FROM audit_log
+		WHERE 1=1
+	`)
+
+	args := make([]any, 0)
+	argNum := 1
+
+	if filters.ProjectID != "" {
+		query.WriteString(fmt.Sprintf(" AND project_id = $%d", argNum))
+		args = append(args, filters.ProjectID)
+		argNum++
+	}
+
+	if filters.APIKeyID != "" {
+		query.WriteString(fmt.Sprintf(" AND api_key_id = $%d", argNum))
+		args = append(args, filters.APIKeyID)
+		argNum++
+	}
+
+	if filters.CommandType != "" {
+		query.WriteString(fmt.Sprintf(" AND command_type = $%d", argNum))
+		args = append(args, string(filters.CommandType))
+		argNum++
+	}
+
+	if filters.Status != "" {
+		query.WriteString(fmt.Sprintf(" AND status = $%d", argNum))
+		args = append(args, string(filters.Status))
+		argNum++
+	}
+
+	if filters.StartTime != nil {
+		query.WriteString(fmt.Sprintf(" AND created_at >= $%d", argNum))
+		args = append(args, *filters.StartTime)
+		argNum++
+	}
+
+	if filters.EndTime != nil {
+		query.WriteString(fmt.Sprintf(" AND created_at < $%d", argNum))
+		args = append(args, *filters.EndTime)
+		argNum++
+	}
+
+	query.WriteString(" ORDER BY created_at DESC")
+
+	if filters.Limit > 0 {
+		query.WriteString(fmt.Sprintf(" LIMIT $%d", argNum))
+		args = append(args, filters.Limit)
+		argNum++
+	}
+
+	if filters.Offset > 0 {
+		query.WriteString(fmt.Sprintf(" OFFSET $%d", argNum))
+		args = append(args, filters.Offset)
+	}
+
+	rows, err := l.db.QueryContext(ctx, query.String(), args...)
+	if err != nil {
+		return nil, fmt.Errorf("query audit log: %w", err)
+	}
+	defer func() { _ = rows.Close() }()
+
+	var entries []domain.AuditLogEntry
+	for rows.Next() {
+		var entry domain.AuditLogEntry
+		var commandType string
+		var status string
+		var completedAt sql.NullTime
+		var exitCode sql.NullInt32
+		var durationMs sql.NullInt64
+		var errorMessage sql.NullString
+
+		if err := rows.Scan(
+			&entry.ID,
+			&entry.APIKeyID,
+			&entry.CommandID,
+			&entry.ProjectID,
+			&commandType,
+			&entry.Args,
+			&entry.ClientIP,
+			&entry.UserAgent,
+			&entry.StartedAt,
+			&completedAt,
+			&exitCode,
+			&durationMs,
+			&status,
+			&errorMessage,
+			&entry.OutputSizeBytes,
+			&entry.CreatedAt,
+		); err != nil {
+			return nil, fmt.Errorf("scan audit log: %w", err)
+		}
+
+		entry.CommandType = domain.CommandType(commandType)
+		entry.Status = domain.AuditStatus(status)
+		if completedAt.Valid {
+			entry.CompletedAt = &completedAt.Time
+		}
+		if exitCode.Valid {
+			ec := int(exitCode.Int32)
+			entry.ExitCode = &ec
+		}
+		if durationMs.Valid {
+			dm := durationMs.Int64
+			entry.DurationMs = &dm
+		}
+		if errorMessage.Valid {
+			entry.ErrorMessage = errorMessage.String
+		}
+		entries = append(entries, entry)
+	}
+
+	if err := rows.Err(); err != nil {
+		return nil, fmt.Errorf("iterate audit log: %w", err)
+	}
+
+	return entries, nil
+}
+
+// Get returns a single audit log entry by command ID.
+func (l *AuditLogger) Get(ctx context.Context, commandID string) (*domain.AuditLogEntry, error) {
+	var entry domain.AuditLogEntry
+	var commandType string
+	var status string
+	var completedAt sql.NullTime
+	var exitCode sql.NullInt32
+	var durationMs sql.NullInt64
+	var errorMessage sql.NullString
+
+	err := l.db.QueryRowContext(ctx, `
+		SELECT id, api_key_id, command_id, project_id, command_type, args,
+			client_ip, user_agent, started_at, completed_at, exit_code,
+			duration_ms, status, error_message, output_size_bytes, created_at
+		FROM audit_log
+		WHERE command_id = $1
+	`, commandID).Scan(
+		&entry.ID,
+		&entry.APIKeyID,
+		&entry.CommandID,
+		&entry.ProjectID,
+		&commandType,
+		&entry.Args,
+		&entry.ClientIP,
+		&entry.UserAgent,
+		&entry.StartedAt,
+		&completedAt,
+		&exitCode,
+		&durationMs,
+		&status,
+		&errorMessage,
+		&entry.OutputSizeBytes,
+		&entry.CreatedAt,
+	)
+
+	if errors.Is(err, sql.ErrNoRows) {
+		return nil, domain.ErrAuditNotFound
+	}
+	if err != nil {
+		return nil, fmt.Errorf("query audit log: %w", err)
+	}
+
+	entry.CommandType = domain.CommandType(commandType)
+	entry.Status = domain.AuditStatus(status)
+	if completedAt.Valid {
+		entry.CompletedAt = &completedAt.Time
+	}
+	if exitCode.Valid {
+		ec := int(exitCode.Int32)
+		entry.ExitCode = &ec
+	}
+	if durationMs.Valid {
+		dm := durationMs.Int64
+		entry.DurationMs = &dm
+	}
+	if errorMessage.Valid {
+		entry.ErrorMessage = errorMessage.String
+	}
+
+	return &entry, nil
+}
--- a/internal/adapter/postgres/audit_logger_test.go
+++ b/internal/adapter/postgres/audit_logger_test.go
@ -0,0 +1,316 @@
+package postgres
+
+import (
+	"context"
+	"database/sql"
+	"testing"
+	"time"
+
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/testutil"
+)
+
+func cleanupTestAuditLogs(t *testing.T, db *sql.DB) {
+	t.Helper()
+	_, err := db.Exec("DELETE FROM audit_log WHERE args LIKE 'test-%'")
+	if err != nil {
+		t.Logf("cleanup test audit logs: %v", err)
+	}
+}
+
+func TestAuditLogger_LogCommandStart(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestAuditLogs(t, db) })
+
+	logger := NewAuditLogger(db)
+	ctx := context.Background()
+
+	t.Run("logs command start successfully", func(t *testing.T) {
+		now := time.Now()
+		entry := &domain.AuditLogEntry{
+			ID:              "audit-test-1",
+			APIKeyID:        "key-test-1",
+			CommandID:       "cmd-test-1",
+			ProjectID:       "proj-test-1",
+			CommandType:     domain.CommandTypeClaude,
+			Args:            "test-args-1",
+			ClientIP:        "127.0.0.1",
+			UserAgent:       "test-agent",
+			StartedAt:       now,
+			OutputSizeBytes: 0,
+		}
+
+		err := logger.LogCommandStart(ctx, entry)
+		if err != nil {
+			t.Fatalf("LogCommandStart() error = %v", err)
+		}
+
+		// Verify by retrieving
+		retrieved, err := logger.Get(ctx, "cmd-test-1")
+		if err != nil {
+			t.Fatalf("Get() error = %v", err)
+		}
+
+		if retrieved.CommandID != "cmd-test-1" {
+			t.Errorf("CommandID = %q, want %q", retrieved.CommandID, "cmd-test-1")
+		}
+		if retrieved.Status != domain.AuditStatusRunning {
+			t.Errorf("Status = %q, want %q", retrieved.Status, domain.AuditStatusRunning)
+		}
+	})
+}
+
+func TestAuditLogger_LogCommandEnd(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestAuditLogs(t, db) })
+
+	logger := NewAuditLogger(db)
+	ctx := context.Background()
+
+	t.Run("logs command end successfully", func(t *testing.T) {
+		// First create a command start
+		now := time.Now()
+		entry := &domain.AuditLogEntry{
+			ID:          "audit-test-end-1",
+			APIKeyID:    "key-test-2",
+			CommandID:   "cmd-test-end-1",
+			ProjectID:   "proj-test-2",
+			CommandType: domain.CommandTypeShell,
+			Args:        "test-end-args",
+			ClientIP:    "127.0.0.1",
+			UserAgent:   "test-agent",
+			StartedAt:   now,
+		}
+
+		err := logger.LogCommandStart(ctx, entry)
+		if err != nil {
+			t.Fatalf("LogCommandStart() error = %v", err)
+		}
+
+		// Now log the end
+		result := &domain.AuditResult{
+			ExitCode:        0,
+			DurationMs:      1000,
+			Status:          domain.AuditStatusSuccess,
+			ErrorMessage:    "",
+			OutputSizeBytes: 256,
+		}
+
+		err = logger.LogCommandEnd(ctx, "cmd-test-end-1", result)
+		if err != nil {
+			t.Fatalf("LogCommandEnd() error = %v", err)
+		}
+
+		// Verify
+		retrieved, err := logger.Get(ctx, "cmd-test-end-1")
+		if err != nil {
+			t.Fatalf("Get() error = %v", err)
+		}
+
+		if retrieved.Status != domain.AuditStatusSuccess {
+			t.Errorf("Status = %q, want %q", retrieved.Status, domain.AuditStatusSuccess)
+		}
+		if retrieved.ExitCode == nil || *retrieved.ExitCode != 0 {
+			t.Errorf("ExitCode = %v, want 0", retrieved.ExitCode)
+		}
+		if retrieved.DurationMs == nil || *retrieved.DurationMs != 1000 {
+			t.Errorf("DurationMs = %v, want 1000", retrieved.DurationMs)
+		}
+		if retrieved.CompletedAt == nil {
+			t.Error("CompletedAt should be set")
+		}
+	})
+
+	t.Run("logs failed command", func(t *testing.T) {
+		entry := &domain.AuditLogEntry{
+			ID:          "audit-test-fail-1",
+			APIKeyID:    "key-test-3",
+			CommandID:   "cmd-test-fail-1",
+			ProjectID:   "proj-test-3",
+			CommandType: domain.CommandTypeShell,
+			Args:        "test-fail-args",
+			ClientIP:    "127.0.0.1",
+			UserAgent:   "test-agent",
+			StartedAt:   time.Now(),
+		}
+
+		_ = logger.LogCommandStart(ctx, entry)
+
+		result := &domain.AuditResult{
+			ExitCode:     1,
+			DurationMs:   500,
+			Status:       domain.AuditStatusError,
+			ErrorMessage: "command failed",
+		}
+
+		err := logger.LogCommandEnd(ctx, "cmd-test-fail-1", result)
+		if err != nil {
+			t.Fatalf("LogCommandEnd() error = %v", err)
+		}
+
+		retrieved, _ := logger.Get(ctx, "cmd-test-fail-1")
+		if retrieved.Status != domain.AuditStatusError {
+			t.Errorf("Status = %q, want %q", retrieved.Status, domain.AuditStatusError)
+		}
+		if retrieved.ErrorMessage != "command failed" {
+			t.Errorf("ErrorMessage = %q, want %q", retrieved.ErrorMessage, "command failed")
+		}
+	})
+}
+
+func TestAuditLogger_List(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestAuditLogs(t, db) })
+
+	logger := NewAuditLogger(db)
+	ctx := context.Background()
+
+	// Create test entries
+	now := time.Now()
+	for i := 0; i < 5; i++ {
+		entry := &domain.AuditLogEntry{
+			ID:          "audit-list-" + string(rune('a'+i)),
+			APIKeyID:    "key-list-1",
+			CommandID:   "cmd-list-" + string(rune('a'+i)),
+			ProjectID:   "proj-list-1",
+			CommandType: domain.CommandTypeClaude,
+			Args:        "test-list-args",
+			ClientIP:    "127.0.0.1",
+			UserAgent:   "test-agent",
+			StartedAt:   now.Add(time.Duration(i) * time.Minute),
+		}
+		_ = logger.LogCommandStart(ctx, entry)
+	}
+
+	t.Run("lists all entries", func(t *testing.T) {
+		entries, err := logger.List(ctx, domain.AuditFilters{
+			ProjectID: "proj-list-1",
+		})
+		if err != nil {
+			t.Fatalf("List() error = %v", err)
+		}
+
+		if len(entries) < 5 {
+			t.Errorf("List() returned %d entries, want at least 5", len(entries))
+		}
+	})
+
+	t.Run("filters by project", func(t *testing.T) {
+		// Create entry in different project
+		entry := &domain.AuditLogEntry{
+			ID:          "audit-list-other",
+			APIKeyID:    "key-list-2",
+			CommandID:   "cmd-list-other",
+			ProjectID:   "proj-list-other",
+			CommandType: domain.CommandTypeClaude,
+			Args:        "test-list-other",
+			ClientIP:    "127.0.0.1",
+			UserAgent:   "test-agent",
+			StartedAt:   now,
+		}
+		_ = logger.LogCommandStart(ctx, entry)
+
+		entries, err := logger.List(ctx, domain.AuditFilters{
+			ProjectID: "proj-list-other",
+		})
+		if err != nil {
+			t.Fatalf("List() error = %v", err)
+		}
+
+		// Check all entries have the filtered project
+		for _, e := range entries {
+			if e.ProjectID != "proj-list-other" {
+				t.Errorf("Entry has ProjectID = %q, want %q", e.ProjectID, "proj-list-other")
+			}
+		}
+	})
+
+	t.Run("applies limit and offset", func(t *testing.T) {
+		entries, err := logger.List(ctx, domain.AuditFilters{
+			ProjectID: "proj-list-1",
+			Limit:     2,
+			Offset:    0,
+		})
+		if err != nil {
+			t.Fatalf("List() error = %v", err)
+		}
+
+		if len(entries) != 2 {
+			t.Errorf("List() returned %d entries, want 2", len(entries))
+		}
+	})
+
+	t.Run("filters by command type", func(t *testing.T) {
+		entries, err := logger.List(ctx, domain.AuditFilters{
+			ProjectID:   "proj-list-1",
+			CommandType: domain.CommandTypeClaude,
+		})
+		if err != nil {
+			t.Fatalf("List() error = %v", err)
+		}
+
+		for _, e := range entries {
+			if e.CommandType != domain.CommandTypeClaude {
+				t.Errorf("Entry has CommandType = %q, want %q", e.CommandType, domain.CommandTypeClaude)
+			}
+		}
+	})
+
+	t.Run("filters by status", func(t *testing.T) {
+		entries, err := logger.List(ctx, domain.AuditFilters{
+			ProjectID: "proj-list-1",
+			Status:    domain.AuditStatusRunning,
+		})
+		if err != nil {
+			t.Fatalf("List() error = %v", err)
+		}
+
+		for _, e := range entries {
+			if e.Status != domain.AuditStatusRunning {
+				t.Errorf("Entry has Status = %q, want %q", e.Status, domain.AuditStatusRunning)
+			}
+		}
+	})
+}
+
+func TestAuditLogger_Get(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestAuditLogs(t, db) })
+
+	logger := NewAuditLogger(db)
+	ctx := context.Background()
+
+	t.Run("gets existing entry", func(t *testing.T) {
+		entry := &domain.AuditLogEntry{
+			ID:          "audit-get-1",
+			APIKeyID:    "key-get-1",
+			CommandID:   "cmd-get-1",
+			ProjectID:   "proj-get-1",
+			CommandType: domain.CommandTypeClaude,
+			Args:        "test-get-args",
+			ClientIP:    "10.0.0.1",
+			UserAgent:   "test-agent-get",
+			StartedAt:   time.Now(),
+		}
+		logger.LogCommandStart(ctx, entry)
+
+		retrieved, err := logger.Get(ctx, "cmd-get-1")
+		if err != nil {
+			t.Fatalf("Get() error = %v", err)
+		}
+
+		if retrieved.CommandID != "cmd-get-1" {
+			t.Errorf("CommandID = %q, want %q", retrieved.CommandID, "cmd-get-1")
+		}
+		if retrieved.ClientIP != "10.0.0.1" {
+			t.Errorf("ClientIP = %q, want %q", retrieved.ClientIP, "10.0.0.1")
+		}
+	})
+
+	t.Run("returns error for non-existent entry", func(t *testing.T) {
+		_, err := logger.Get(ctx, "cmd-nonexistent")
+		if err != domain.ErrAuditNotFound {
+			t.Errorf("Get() error = %v, want %v", err, domain.ErrAuditNotFound)
+		}
+	})
+}
--- a/internal/adapter/postgres/command_queue.go
+++ b/internal/adapter/postgres/command_queue.go
@ -0,0 +1,417 @@
+// Package postgres provides PostgreSQL-based implementations of port interfaces.
+package postgres
+
+import (
+	"context"
+	"database/sql"
+	"errors"
+	"fmt"
+	"time"
+
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/port"
+)
+
+// CommandQueueRepository implements port.CommandQueue using PostgreSQL.
+type CommandQueueRepository struct {
+	db *sql.DB
+}
+
+// NewCommandQueueRepository creates a new PostgreSQL command queue repository.
+func NewCommandQueueRepository(db *sql.DB) *CommandQueueRepository {
+	return &CommandQueueRepository{db: db}
+}
+
+// Ensure CommandQueueRepository implements port.CommandQueue at compile time.
+var _ port.CommandQueue = (*CommandQueueRepository)(nil)
+
+// Enqueue adds a command to the queue.
+func (r *CommandQueueRepository) Enqueue(ctx context.Context, cmd *domain.QueuedCommand) error {
+	var id string
+	err := r.db.QueryRowContext(ctx, `
+		INSERT INTO command_queue (project_id, command, command_type, working_dir, status, priority, api_key_id)
+		VALUES ($1, $2, $3, $4, $5, $6, $7)
+		RETURNING id, created_at
+	`, cmd.ProjectID, cmd.Command, string(cmd.CommandType), nullString(cmd.WorkingDir),
+		string(cmd.Status), cmd.Priority, nullString(cmd.APIKeyID)).Scan(&id, &cmd.CreatedAt)
+
+	if err != nil {
+		return fmt.Errorf("enqueue command: %w", err)
+	}
+
+	cmd.ID = domain.QueuedCommandID(id)
+	return nil
+}
+
+// Dequeue retrieves and locks the next pending command for a project.
+// Uses FOR UPDATE SKIP LOCKED for safe concurrent access.
+func (r *CommandQueueRepository) Dequeue(ctx context.Context, projectID string) (*domain.QueuedCommand, error) {
+	// Use a transaction to atomically select and update
+	tx, err := r.db.BeginTx(ctx, nil)
+	if err != nil {
+		return nil, fmt.Errorf("begin transaction: %w", err)
+	}
+	defer func() { _ = tx.Rollback() }()
+
+	var cmd domain.QueuedCommand
+	var id string
+	var commandType string
+	var status string
+	var workingDir sql.NullString
+	var apiKeyID sql.NullString
+
+	// Select the highest priority pending command and lock it
+	err = tx.QueryRowContext(ctx, `
+		SELECT id, project_id, command, command_type, working_dir, status, priority, created_at, api_key_id
+		FROM command_queue
+		WHERE project_id = $1 AND status = 'pending'
+		ORDER BY priority DESC, created_at ASC
+		LIMIT 1
+		FOR UPDATE SKIP LOCKED
+	`, projectID).Scan(
+		&id,
+		&cmd.ProjectID,
+		&cmd.Command,
+		&commandType,
+		&workingDir,
+		&status,
+		&cmd.Priority,
+		&cmd.CreatedAt,
+		&apiKeyID,
+	)
+
+	if errors.Is(err, sql.ErrNoRows) {
+		return nil, nil // No pending commands
+	}
+	if err != nil {
+		return nil, fmt.Errorf("select pending command: %w", err)
+	}
+
+	cmd.ID = domain.QueuedCommandID(id)
+	cmd.CommandType = domain.CommandType(commandType)
+	cmd.Status = domain.QueueStatus(status)
+	if workingDir.Valid {
+		cmd.WorkingDir = workingDir.String
+	}
+	if apiKeyID.Valid {
+		cmd.APIKeyID = apiKeyID.String
+	}
+
+	// Update status to running
+	now := time.Now()
+	_, err = tx.ExecContext(ctx, `
+		UPDATE command_queue
+		SET status = 'running', started_at = $1
+		WHERE id = $2
+	`, now, id)
+	if err != nil {
+		return nil, fmt.Errorf("update to running: %w", err)
+	}
+
+	if err := tx.Commit(); err != nil {
+		return nil, fmt.Errorf("commit transaction: %w", err)
+	}
+
+	cmd.Status = domain.QueueStatusRunning
+	cmd.StartedAt = &now
+	return &cmd, nil
+}
+
+// UpdateStatus updates the status of a queued command.
+func (r *CommandQueueRepository) UpdateStatus(ctx context.Context, cmdID domain.QueuedCommandID, status domain.QueueStatus, result *domain.QueuedCommandResult) error {
+	var err error
+
+	if result != nil {
+		now := time.Now()
+		_, err = r.db.ExecContext(ctx, `
+			UPDATE command_queue
+			SET status = $1, completed_at = $2, result_exit_code = $3, result_output = $4, result_error = $5
+			WHERE id = $6
+		`, string(status), now, result.ExitCode, nullString(result.Output), nullString(result.Error), string(cmdID))
+	} else {
+		_, err = r.db.ExecContext(ctx, `
+			UPDATE command_queue
+			SET status = $1
+			WHERE id = $2
+		`, string(status), string(cmdID))
+	}
+
+	if err != nil {
+		return fmt.Errorf("update status: %w", err)
+	}
+	return nil
+}
+
+// GetByID retrieves a specific queued command by ID.
+func (r *CommandQueueRepository) GetByID(ctx context.Context, cmdID domain.QueuedCommandID) (*domain.QueuedCommand, error) {
+	var cmd domain.QueuedCommand
+	var id string
+	var commandType string
+	var status string
+	var workingDir sql.NullString
+	var startedAt sql.NullTime
+	var completedAt sql.NullTime
+	var exitCode sql.NullInt32
+	var output sql.NullString
+	var resultError sql.NullString
+	var apiKeyID sql.NullString
+
+	err := r.db.QueryRowContext(ctx, `
+		SELECT id, project_id, command, command_type, working_dir, status, priority,
+		       created_at, started_at, completed_at, result_exit_code, result_output, result_error, api_key_id
+		FROM command_queue
+		WHERE id = $1
+	`, string(cmdID)).Scan(
+		&id,
+		&cmd.ProjectID,
+		&cmd.Command,
+		&commandType,
+		&workingDir,
+		&status,
+		&cmd.Priority,
+		&cmd.CreatedAt,
+		&startedAt,
+		&completedAt,
+		&exitCode,
+		&output,
+		&resultError,
+		&apiKeyID,
+	)
+
+	if errors.Is(err, sql.ErrNoRows) {
+		return nil, domain.ErrCommandNotFound
+	}
+	if err != nil {
+		return nil, fmt.Errorf("get command: %w", err)
+	}
+
+	cmd.ID = domain.QueuedCommandID(id)
+	cmd.CommandType = domain.CommandType(commandType)
+	cmd.Status = domain.QueueStatus(status)
+
+	if workingDir.Valid {
+		cmd.WorkingDir = workingDir.String
+	}
+	if startedAt.Valid {
+		cmd.StartedAt = &startedAt.Time
+	}
+	if completedAt.Valid {
+		cmd.CompletedAt = &completedAt.Time
+	}
+	if exitCode.Valid {
+		ec := int(exitCode.Int32)
+		cmd.ExitCode = &ec
+	}
+	if output.Valid {
+		cmd.Output = output.String
+	}
+	if resultError.Valid {
+		cmd.Error = resultError.String
+	}
+	if apiKeyID.Valid {
+		cmd.APIKeyID = apiKeyID.String
+	}
+
+	return &cmd, nil
+}
+
+// List returns queued commands for a project with optional filters.
+func (r *CommandQueueRepository) List(ctx context.Context, projectID string, filters *domain.QueueFilters) ([]*domain.QueuedCommand, error) {
+	if filters == nil {
+		filters = domain.DefaultQueueFilters()
+	}
+
+	// Build query with optional filters
+	query := `
+		SELECT id, project_id, command, command_type, working_dir, status, priority,
+		       created_at, started_at, completed_at, result_exit_code, result_output, result_error, api_key_id
+		FROM command_queue
+		WHERE project_id = $1
+	`
+	args := []any{projectID}
+	argNum := 2
+
+	if filters.Status != nil {
+		query += fmt.Sprintf(" AND status = $%d", argNum)
+		args = append(args, string(*filters.Status))
+		argNum++
+	}
+
+	// Sort order
+	if filters.SortOrder == "asc" {
+		query += " ORDER BY created_at ASC"
+	} else {
+		query += " ORDER BY created_at DESC"
+	}
+
+	// Pagination
+	query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argNum, argNum+1)
+	args = append(args, filters.Limit, filters.Offset)
+
+	rows, err := r.db.QueryContext(ctx, query, args...)
+	if err != nil {
+		return nil, fmt.Errorf("list commands: %w", err)
+	}
+	defer func() { _ = rows.Close() }()
+
+	var commands []*domain.QueuedCommand
+	for rows.Next() {
+		var cmd domain.QueuedCommand
+		var id string
+		var commandType string
+		var status string
+		var workingDir sql.NullString
+		var startedAt sql.NullTime
+		var completedAt sql.NullTime
+		var exitCode sql.NullInt32
+		var output sql.NullString
+		var resultError sql.NullString
+		var apiKeyID sql.NullString
+
+		if err := rows.Scan(
+			&id,
+			&cmd.ProjectID,
+			&cmd.Command,
+			&commandType,
+			&workingDir,
+			&status,
+			&cmd.Priority,
+			&cmd.CreatedAt,
+			&startedAt,
+			&completedAt,
+			&exitCode,
+			&output,
+			&resultError,
+			&apiKeyID,
+		); err != nil {
+			return nil, fmt.Errorf("scan command: %w", err)
+		}
+
+		cmd.ID = domain.QueuedCommandID(id)
+		cmd.CommandType = domain.CommandType(commandType)
+		cmd.Status = domain.QueueStatus(status)
+
+		if workingDir.Valid {
+			cmd.WorkingDir = workingDir.String
+		}
+		if startedAt.Valid {
+			cmd.StartedAt = &startedAt.Time
+		}
+		if completedAt.Valid {
+			cmd.CompletedAt = &completedAt.Time
+		}
+		if exitCode.Valid {
+			ec := int(exitCode.Int32)
+			cmd.ExitCode = &ec
+		}
+		if output.Valid {
+			cmd.Output = output.String
+		}
+		if resultError.Valid {
+			cmd.Error = resultError.String
+		}
+		if apiKeyID.Valid {
+			cmd.APIKeyID = apiKeyID.String
+		}
+
+		commands = append(commands, &cmd)
+	}
+
+	return commands, nil
+}
+
+// Cancel marks a pending command as cancelled.
+func (r *CommandQueueRepository) Cancel(ctx context.Context, cmdID domain.QueuedCommandID) error {
+	result, err := r.db.ExecContext(ctx, `
+		UPDATE command_queue
+		SET status = 'cancelled', completed_at = NOW()
+		WHERE id = $1 AND status = 'pending'
+	`, string(cmdID))
+	if err != nil {
+		return fmt.Errorf("cancel command: %w", err)
+	}
+
+	rows, err := result.RowsAffected()
+	if err != nil {
+		return fmt.Errorf("rows affected: %w", err)
+	}
+
+	if rows == 0 {
+		// Check if command exists
+		var exists bool
+		err := r.db.QueryRowContext(ctx, `SELECT EXISTS(SELECT 1 FROM command_queue WHERE id = $1)`, string(cmdID)).Scan(&exists)
+		if err != nil {
+			return fmt.Errorf("check exists: %w", err)
+		}
+		if !exists {
+			return domain.ErrCommandNotFound
+		}
+		// Command exists but not in pending state
+		return fmt.Errorf("command is not in pending state")
+	}
+
+	return nil
+}
+
+// GetStats returns queue statistics for a project (or all projects if empty).
+func (r *CommandQueueRepository) GetStats(ctx context.Context, projectID string) (*domain.QueueStats, error) {
+	var stats domain.QueueStats
+
+	query := `
+		SELECT
+			COUNT(*) FILTER (WHERE status = 'pending') as pending,
+			COUNT(*) FILTER (WHERE status = 'running') as running,
+			COUNT(*) FILTER (WHERE status = 'completed') as completed,
+			COUNT(*) FILTER (WHERE status = 'failed') as failed,
+			COUNT(*) FILTER (WHERE status = 'cancelled') as cancelled
+		FROM command_queue
+	`
+
+	var err error
+	if projectID != "" {
+		query += " WHERE project_id = $1"
+		err = r.db.QueryRowContext(ctx, query, projectID).Scan(
+			&stats.TotalPending,
+			&stats.TotalRunning,
+			&stats.TotalCompleted,
+			&stats.TotalFailed,
+			&stats.TotalCancelled,
+		)
+	} else {
+		err = r.db.QueryRowContext(ctx, query).Scan(
+			&stats.TotalPending,
+			&stats.TotalRunning,
+			&stats.TotalCompleted,
+			&stats.TotalFailed,
+			&stats.TotalCancelled,
+		)
+	}
+
+	if err != nil {
+		return nil, fmt.Errorf("get stats: %w", err)
+	}
+
+	return &stats, nil
+}
+
+// CleanupOld removes completed/failed/cancelled commands older than the specified duration.
+func (r *CommandQueueRepository) CleanupOld(ctx context.Context, olderThanDays int) (int64, error) {
+	result, err := r.db.ExecContext(ctx, `
+		DELETE FROM command_queue
+		WHERE status IN ('completed', 'failed', 'cancelled')
+		AND completed_at < NOW() - INTERVAL '1 day' * $1
+	`, olderThanDays)
+	if err != nil {
+		return 0, fmt.Errorf("cleanup old commands: %w", err)
+	}
+
+	return result.RowsAffected()
+}
+
+// nullString returns a sql.NullString for optional string fields.
+func nullString(s string) sql.NullString {
+	if s == "" {
+		return sql.NullString{}
+	}
+	return sql.NullString{String: s, Valid: true}
+}
--- a/internal/adapter/postgres/command_queue_test.go
+++ b/internal/adapter/postgres/command_queue_test.go
@ -0,0 +1,487 @@
+package postgres
+
+import (
+	"context"
+	"database/sql"
+	"testing"
+	"time"
+
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/testutil"
+)
+
+func cleanupTestQueue(t *testing.T, db *sql.DB) {
+	t.Helper()
+	_, err := db.Exec("DELETE FROM command_queue WHERE project_id LIKE 'test-%'")
+	if err != nil {
+		t.Logf("cleanup test queue: %v", err)
+	}
+}
+
+func TestCommandQueueRepository_Enqueue(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestQueue(t, db) })
+
+	repo := NewCommandQueueRepository(db)
+	ctx := context.Background()
+
+	t.Run("enqueues command successfully", func(t *testing.T) {
+		cmd := &domain.QueuedCommand{
+			ProjectID:   "test-proj-enqueue-1",
+			Command:     "explain this code",
+			CommandType: domain.CommandTypeClaude,
+			Status:      domain.QueueStatusPending,
+			Priority:    0,
+			APIKeyID:    "key-1",
+		}
+
+		err := repo.Enqueue(ctx, cmd)
+		if err != nil {
+			t.Fatalf("Enqueue() error = %v", err)
+		}
+
+		if cmd.ID == "" {
+			t.Error("ID should be set after enqueue")
+		}
+		if cmd.CreatedAt.IsZero() {
+			t.Error("CreatedAt should be set after enqueue")
+		}
+	})
+
+	t.Run("enqueues with working directory", func(t *testing.T) {
+		cmd := &domain.QueuedCommand{
+			ProjectID:   "test-proj-enqueue-2",
+			Command:     "ls -la",
+			CommandType: domain.CommandTypeShell,
+			WorkingDir:  "/tmp",
+			Status:      domain.QueueStatusPending,
+			Priority:    1,
+		}
+
+		err := repo.Enqueue(ctx, cmd)
+		if err != nil {
+			t.Fatalf("Enqueue() error = %v", err)
+		}
+
+		// Retrieve and verify
+		retrieved, err := repo.GetByID(ctx, cmd.ID)
+		if err != nil {
+			t.Fatalf("GetByID() error = %v", err)
+		}
+
+		if retrieved.WorkingDir != "/tmp" {
+			t.Errorf("WorkingDir = %q, want %q", retrieved.WorkingDir, "/tmp")
+		}
+	})
+}
+
+func TestCommandQueueRepository_Dequeue(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestQueue(t, db) })
+
+	repo := NewCommandQueueRepository(db)
+	ctx := context.Background()
+
+	t.Run("dequeues pending command", func(t *testing.T) {
+		// Create a pending command
+		cmd := &domain.QueuedCommand{
+			ProjectID:   "test-proj-dequeue-1",
+			Command:     "test command",
+			CommandType: domain.CommandTypeClaude,
+			Status:      domain.QueueStatusPending,
+			Priority:    0,
+		}
+		_ = repo.Enqueue(ctx, cmd)
+
+		// Dequeue it
+		dequeued, err := repo.Dequeue(ctx, "test-proj-dequeue-1")
+		if err != nil {
+			t.Fatalf("Dequeue() error = %v", err)
+		}
+
+		if dequeued == nil {
+			t.Fatal("Dequeue() returned nil")
+		}
+
+		if dequeued.Status != domain.QueueStatusRunning {
+			t.Errorf("Status = %q, want %q", dequeued.Status, domain.QueueStatusRunning)
+		}
+		if dequeued.StartedAt == nil {
+			t.Error("StartedAt should be set after dequeue")
+		}
+	})
+
+	t.Run("returns nil when no pending commands", func(t *testing.T) {
+		dequeued, err := repo.Dequeue(ctx, "test-proj-dequeue-empty")
+		if err != nil {
+			t.Fatalf("Dequeue() error = %v", err)
+		}
+
+		if dequeued != nil {
+			t.Error("Dequeue() should return nil when no pending commands")
+		}
+	})
+
+	t.Run("dequeues highest priority first", func(t *testing.T) {
+		projectID := "test-proj-dequeue-priority"
+
+		// Create commands with different priorities
+		low := &domain.QueuedCommand{
+			ProjectID:   projectID,
+			Command:     "low priority",
+			CommandType: domain.CommandTypeClaude,
+			Status:      domain.QueueStatusPending,
+			Priority:    0,
+		}
+		_ = repo.Enqueue(ctx, low)
+
+		// Small delay to ensure different timestamps
+		time.Sleep(10 * time.Millisecond)
+
+		high := &domain.QueuedCommand{
+			ProjectID:   projectID,
+			Command:     "high priority",
+			CommandType: domain.CommandTypeClaude,
+			Status:      domain.QueueStatusPending,
+			Priority:    10,
+		}
+		_ = repo.Enqueue(ctx, high)
+
+		// Dequeue should get high priority first
+		dequeued, err := repo.Dequeue(ctx, projectID)
+		if err != nil {
+			t.Fatalf("Dequeue() error = %v", err)
+		}
+
+		if dequeued.Command != "high priority" {
+			t.Errorf("Command = %q, want %q", dequeued.Command, "high priority")
+		}
+	})
+}
+
+func TestCommandQueueRepository_UpdateStatus(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestQueue(t, db) })
+
+	repo := NewCommandQueueRepository(db)
+	ctx := context.Background()
+
+	t.Run("updates status without result", func(t *testing.T) {
+		cmd := &domain.QueuedCommand{
+			ProjectID:   "test-proj-status-1",
+			Command:     "test",
+			CommandType: domain.CommandTypeClaude,
+			Status:      domain.QueueStatusPending,
+		}
+		repo.Enqueue(ctx, cmd)
+
+		err := repo.UpdateStatus(ctx, cmd.ID, domain.QueueStatusRunning, nil)
+		if err != nil {
+			t.Fatalf("UpdateStatus() error = %v", err)
+		}
+
+		retrieved, _ := repo.GetByID(ctx, cmd.ID)
+		if retrieved.Status != domain.QueueStatusRunning {
+			t.Errorf("Status = %q, want %q", retrieved.Status, domain.QueueStatusRunning)
+		}
+	})
+
+	t.Run("updates status with result", func(t *testing.T) {
+		cmd := &domain.QueuedCommand{
+			ProjectID:   "test-proj-status-2",
+			Command:     "test",
+			CommandType: domain.CommandTypeShell,
+			Status:      domain.QueueStatusRunning,
+		}
+		repo.Enqueue(ctx, cmd)
+
+		result := &domain.QueuedCommandResult{
+			ExitCode: 0,
+			Output:   "success output",
+			Error:    "",
+		}
+
+		err := repo.UpdateStatus(ctx, cmd.ID, domain.QueueStatusCompleted, result)
+		if err != nil {
+			t.Fatalf("UpdateStatus() error = %v", err)
+		}
+
+		retrieved, _ := repo.GetByID(ctx, cmd.ID)
+		if retrieved.Status != domain.QueueStatusCompleted {
+			t.Errorf("Status = %q, want %q", retrieved.Status, domain.QueueStatusCompleted)
+		}
+		if retrieved.ExitCode == nil || *retrieved.ExitCode != 0 {
+			t.Errorf("ExitCode = %v, want 0", retrieved.ExitCode)
+		}
+		if retrieved.Output != "success output" {
+			t.Errorf("Output = %q, want %q", retrieved.Output, "success output")
+		}
+	})
+}
+
+func TestCommandQueueRepository_GetByID(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestQueue(t, db) })
+
+	repo := NewCommandQueueRepository(db)
+	ctx := context.Background()
+
+	t.Run("gets existing command", func(t *testing.T) {
+		cmd := &domain.QueuedCommand{
+			ProjectID:   "test-proj-getbyid",
+			Command:     "get by id test",
+			CommandType: domain.CommandTypeClaude,
+			WorkingDir:  "/test/dir",
+			Status:      domain.QueueStatusPending,
+			Priority:    5,
+			APIKeyID:    "key-getbyid",
+		}
+		repo.Enqueue(ctx, cmd)
+
+		retrieved, err := repo.GetByID(ctx, cmd.ID)
+		if err != nil {
+			t.Fatalf("GetByID() error = %v", err)
+		}
+
+		if retrieved.Command != "get by id test" {
+			t.Errorf("Command = %q, want %q", retrieved.Command, "get by id test")
+		}
+		if retrieved.Priority != 5 {
+			t.Errorf("Priority = %d, want 5", retrieved.Priority)
+		}
+		if retrieved.APIKeyID != "key-getbyid" {
+			t.Errorf("APIKeyID = %q, want %q", retrieved.APIKeyID, "key-getbyid")
+		}
+	})
+
+	t.Run("returns error for non-existent command", func(t *testing.T) {
+		_, err := repo.GetByID(ctx, "00000000-0000-0000-0000-000000000000")
+		if err != domain.ErrCommandNotFound {
+			t.Errorf("GetByID() error = %v, want %v", err, domain.ErrCommandNotFound)
+		}
+	})
+}
+
+func TestCommandQueueRepository_List(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestQueue(t, db) })
+
+	repo := NewCommandQueueRepository(db)
+	ctx := context.Background()
+
+	projectID := "test-proj-list"
+
+	// Create test commands
+	for i := 0; i < 5; i++ {
+		status := domain.QueueStatusPending
+		if i%2 == 0 {
+			status = domain.QueueStatusCompleted
+		}
+		cmd := &domain.QueuedCommand{
+			ProjectID:   projectID,
+			Command:     "list test " + string(rune('a'+i)),
+			CommandType: domain.CommandTypeClaude,
+			Status:      status,
+		}
+		repo.Enqueue(ctx, cmd)
+		time.Sleep(10 * time.Millisecond) // Ensure different timestamps
+	}
+
+	t.Run("lists all commands for project", func(t *testing.T) {
+		commands, err := repo.List(ctx, projectID, nil)
+		if err != nil {
+			t.Fatalf("List() error = %v", err)
+		}
+
+		if len(commands) < 5 {
+			t.Errorf("List() returned %d commands, want at least 5", len(commands))
+		}
+	})
+
+	t.Run("filters by status", func(t *testing.T) {
+		status := domain.QueueStatusPending
+		commands, err := repo.List(ctx, projectID, &domain.QueueFilters{
+			Status: &status,
+			Limit:  100,
+		})
+		if err != nil {
+			t.Fatalf("List() error = %v", err)
+		}
+
+		for _, cmd := range commands {
+			if cmd.Status != domain.QueueStatusPending {
+				t.Errorf("Command has Status = %q, want %q", cmd.Status, domain.QueueStatusPending)
+			}
+		}
+	})
+
+	t.Run("respects limit and offset", func(t *testing.T) {
+		commands, err := repo.List(ctx, projectID, &domain.QueueFilters{
+			Limit:  2,
+			Offset: 0,
+		})
+		if err != nil {
+			t.Fatalf("List() error = %v", err)
+		}
+
+		if len(commands) != 2 {
+			t.Errorf("List() returned %d commands, want 2", len(commands))
+		}
+	})
+
+	t.Run("respects sort order", func(t *testing.T) {
+		commands, err := repo.List(ctx, projectID, &domain.QueueFilters{
+			SortOrder: "asc",
+			Limit:     100,
+		})
+		if err != nil {
+			t.Fatalf("List() error = %v", err)
+		}
+
+		if len(commands) >= 2 {
+			if commands[0].CreatedAt.After(commands[1].CreatedAt) {
+				t.Error("List() with asc sort order should return oldest first")
+			}
+		}
+	})
+}
+
+func TestCommandQueueRepository_Cancel(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestQueue(t, db) })
+
+	repo := NewCommandQueueRepository(db)
+	ctx := context.Background()
+
+	t.Run("cancels pending command", func(t *testing.T) {
+		cmd := &domain.QueuedCommand{
+			ProjectID:   "test-proj-cancel",
+			Command:     "cancel test",
+			CommandType: domain.CommandTypeClaude,
+			Status:      domain.QueueStatusPending,
+		}
+		repo.Enqueue(ctx, cmd)
+
+		err := repo.Cancel(ctx, cmd.ID)
+		if err != nil {
+			t.Fatalf("Cancel() error = %v", err)
+		}
+
+		retrieved, _ := repo.GetByID(ctx, cmd.ID)
+		if retrieved.Status != domain.QueueStatusCancelled {
+			t.Errorf("Status = %q, want %q", retrieved.Status, domain.QueueStatusCancelled)
+		}
+		if retrieved.CompletedAt == nil {
+			t.Error("CompletedAt should be set after cancel")
+		}
+	})
+
+	t.Run("returns error for non-pending command", func(t *testing.T) {
+		cmd := &domain.QueuedCommand{
+			ProjectID:   "test-proj-cancel-running",
+			Command:     "cancel running test",
+			CommandType: domain.CommandTypeClaude,
+			Status:      domain.QueueStatusPending,
+		}
+		_ = repo.Enqueue(ctx, cmd)
+
+		// Make it running
+		_ = repo.UpdateStatus(ctx, cmd.ID, domain.QueueStatusRunning, nil)
+
+		err := repo.Cancel(ctx, cmd.ID)
+		if err == nil {
+			t.Error("Cancel() should return error for running command")
+		}
+	})
+
+	t.Run("returns error for non-existent command", func(t *testing.T) {
+		err := repo.Cancel(ctx, "00000000-0000-0000-0000-000000000000")
+		if err != domain.ErrCommandNotFound {
+			t.Errorf("Cancel() error = %v, want %v", err, domain.ErrCommandNotFound)
+		}
+	})
+}
+
+func TestCommandQueueRepository_GetStats(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestQueue(t, db) })
+
+	repo := NewCommandQueueRepository(db)
+	ctx := context.Background()
+
+	projectID := "test-proj-stats"
+
+	// Create commands with different statuses
+	statuses := []domain.QueueStatus{
+		domain.QueueStatusPending,
+		domain.QueueStatusPending,
+		domain.QueueStatusRunning,
+		domain.QueueStatusCompleted,
+		domain.QueueStatusFailed,
+	}
+
+	for i, status := range statuses {
+		cmd := &domain.QueuedCommand{
+			ProjectID:   projectID,
+			Command:     "stats test " + string(rune('a'+i)),
+			CommandType: domain.CommandTypeClaude,
+			Status:      status,
+		}
+		repo.Enqueue(ctx, cmd)
+	}
+
+	t.Run("returns correct stats", func(t *testing.T) {
+		stats, err := repo.GetStats(ctx, projectID)
+		if err != nil {
+			t.Fatalf("GetStats() error = %v", err)
+		}
+
+		if stats.TotalPending < 2 {
+			t.Errorf("TotalPending = %d, want at least 2", stats.TotalPending)
+		}
+		// Note: Running status is set during enqueue but some may be dequeued
+		if stats.TotalCompleted < 1 {
+			t.Errorf("TotalCompleted = %d, want at least 1", stats.TotalCompleted)
+		}
+		if stats.TotalFailed < 1 {
+			t.Errorf("TotalFailed = %d, want at least 1", stats.TotalFailed)
+		}
+	})
+}
+
+func TestCommandQueueRepository_CleanupOld(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestQueue(t, db) })
+
+	repo := NewCommandQueueRepository(db)
+	ctx := context.Background()
+
+	// Create completed commands (CleanupOld only removes terminal states)
+	for i := 0; i < 3; i++ {
+		cmd := &domain.QueuedCommand{
+			ProjectID:   "test-proj-cleanup",
+			Command:     "cleanup test " + string(rune('a'+i)),
+			CommandType: domain.CommandTypeClaude,
+			Status:      domain.QueueStatusPending,
+		}
+		_ = repo.Enqueue(ctx, cmd)
+
+		// Complete them
+		result := &domain.QueuedCommandResult{ExitCode: 0}
+		_ = repo.UpdateStatus(ctx, cmd.ID, domain.QueueStatusCompleted, result)
+	}
+
+	t.Run("cleanup runs without error", func(t *testing.T) {
+		// This won't delete newly created entries (they're not old enough)
+		// but we verify the function executes without error
+		deleted, err := repo.CleanupOld(ctx, 30)
+		if err != nil {
+			t.Fatalf("CleanupOld() error = %v", err)
+		}
+
+		// Newly created commands shouldn't be deleted
+		if deleted != 0 {
+			t.Logf("CleanupOld() deleted %d commands (expected 0 for new commands)", deleted)
+		}
+	})
+}
--- a/internal/adapter/postgres/rate_limiter.go
+++ b/internal/adapter/postgres/rate_limiter.go
@ -0,0 +1,236 @@
+// Package postgres provides PostgreSQL-based implementations of port interfaces.
+package postgres
+
+import (
+	"context"
+	"database/sql"
+	"errors"
+	"fmt"
+	"log/slog"
+	"time"
+
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/port"
+)
+
+// RateLimiter implements port.RateLimiter using PostgreSQL.
+type RateLimiter struct {
+	db *sql.DB
+}
+
+// NewRateLimiter creates a new PostgreSQL rate limiter.
+func NewRateLimiter(db *sql.DB) *RateLimiter {
+	return &RateLimiter{db: db}
+}
+
+// Ensure RateLimiter implements port.RateLimiter at compile time.
+var _ port.RateLimiter = (*RateLimiter)(nil)
+
+// CheckLimit checks if a request is allowed under the rate limit.
+func (r *RateLimiter) CheckLimit(ctx context.Context, keyID string) (*domain.RateLimitResult, error) {
+	now := time.Now()
+	minuteWindow := domain.TruncateToMinute(now)
+	hourWindow := domain.TruncateToHour(now)
+
+	// Get rate limits for this key
+	limits, err := r.GetLimits(ctx, keyID)
+	if err != nil {
+		return nil, fmt.Errorf("get limits: %w", err)
+	}
+
+	// Get current usage for minute window
+	minuteCount, err := r.getWindowCount(ctx, keyID, minuteWindow, domain.WindowTypeMinute)
+	if err != nil {
+		return nil, fmt.Errorf("get minute count: %w", err)
+	}
+
+	// Get current usage for hour window
+	hourCount, err := r.getWindowCount(ctx, keyID, hourWindow, domain.WindowTypeHour)
+	if err != nil {
+		return nil, fmt.Errorf("get hour count: %w", err)
+	}
+
+	result := &domain.RateLimitResult{
+		LimitMinute:     limits.PerMinute,
+		LimitHour:       limits.PerHour,
+		RemainingMinute: limits.PerMinute - minuteCount,
+		RemainingHour:   limits.PerHour - hourCount,
+		ResetMinute:     minuteWindow.Add(time.Minute),
+		ResetHour:       hourWindow.Add(time.Hour),
+	}
+
+	// Ensure remaining doesn't go negative
+	if result.RemainingMinute < 0 {
+		result.RemainingMinute = 0
+	}
+	if result.RemainingHour < 0 {
+		result.RemainingHour = 0
+	}
+
+	// Check if either limit is exceeded
+	if minuteCount >= limits.PerMinute {
+		result.Allowed = false
+		result.RetryAfter = time.Until(result.ResetMinute)
+		if result.RetryAfter < 0 {
+			result.RetryAfter = time.Second
+		}
+		return result, nil
+	}
+
+	if hourCount >= limits.PerHour {
+		result.Allowed = false
+		result.RetryAfter = time.Until(result.ResetHour)
+		if result.RetryAfter < 0 {
+			result.RetryAfter = time.Second
+		}
+		return result, nil
+	}
+
+	result.Allowed = true
+	return result, nil
+}
+
+// RecordRequest records that a request was made for the given API key.
+func (r *RateLimiter) RecordRequest(ctx context.Context, keyID string) error {
+	now := time.Now()
+	minuteWindow := domain.TruncateToMinute(now)
+	hourWindow := domain.TruncateToHour(now)
+
+	// Use a transaction to update both windows atomically
+	tx, err := r.db.BeginTx(ctx, nil)
+	if err != nil {
+		return fmt.Errorf("begin tx: %w", err)
+	}
+	defer func() { _ = tx.Rollback() }()
+
+	// Upsert minute window
+	if err := r.upsertWindow(ctx, tx, keyID, minuteWindow, domain.WindowTypeMinute); err != nil {
+		return fmt.Errorf("upsert minute window: %w", err)
+	}
+
+	// Upsert hour window
+	if err := r.upsertWindow(ctx, tx, keyID, hourWindow, domain.WindowTypeHour); err != nil {
+		return fmt.Errorf("upsert hour window: %w", err)
+	}
+
+	if err := tx.Commit(); err != nil {
+		return fmt.Errorf("commit: %w", err)
+	}
+
+	return nil
+}
+
+// GetLimits retrieves the rate limit configuration for an API key.
+func (r *RateLimiter) GetLimits(ctx context.Context, keyID string) (*domain.RateLimitConfig, error) {
+	var perMinute, perHour sql.NullInt64
+
+	err := r.db.QueryRowContext(ctx, `
+		SELECT rate_limit_per_minute, rate_limit_per_hour
+		FROM api_keys
+		WHERE id = $1
+	`, keyID).Scan(&perMinute, &perHour)
+
+	if errors.Is(err, sql.ErrNoRows) {
+		// Key not found, return defaults
+		defaults := domain.DefaultRateLimitConfig()
+		return &defaults, nil
+	}
+	if err != nil {
+		return nil, fmt.Errorf("query limits: %w", err)
+	}
+
+	config := domain.DefaultRateLimitConfig()
+	if perMinute.Valid {
+		config.PerMinute = int(perMinute.Int64)
+	}
+	if perHour.Valid {
+		config.PerHour = int(perHour.Int64)
+	}
+
+	return &config, nil
+}
+
+// Cleanup removes expired rate limit state entries.
+func (r *RateLimiter) Cleanup(ctx context.Context) error {
+	// Remove entries older than 2 hours (well past any active window)
+	cutoff := time.Now().Add(-2 * time.Hour)
+
+	result, err := r.db.ExecContext(ctx, `
+		DELETE FROM rate_limit_state
+		WHERE window_start < $1
+	`, cutoff)
+	if err != nil {
+		return fmt.Errorf("delete old entries: %w", err)
+	}
+
+	rows, _ := result.RowsAffected()
+	if rows > 0 {
+		// Log cleanup (optional, could use structured logging)
+		_ = rows
+	}
+
+	return nil
+}
+
+// getWindowCount returns the request count for a specific window.
+func (r *RateLimiter) getWindowCount(ctx context.Context, keyID string, windowStart time.Time, windowType string) (int, error) {
+	var count int
+	err := r.db.QueryRowContext(ctx, `
+		SELECT COALESCE(request_count, 0)
+		FROM rate_limit_state
+		WHERE api_key_id = $1 AND window_start = $2 AND window_type = $3
+	`, keyID, windowStart, windowType).Scan(&count)
+
+	if errors.Is(err, sql.ErrNoRows) {
+		return 0, nil
+	}
+	if err != nil {
+		return 0, fmt.Errorf("query count: %w", err)
+	}
+
+	return count, nil
+}
+
+// upsertWindow inserts or updates a rate limit window.
+func (r *RateLimiter) upsertWindow(ctx context.Context, tx *sql.Tx, keyID string, windowStart time.Time, windowType string) error {
+	_, err := tx.ExecContext(ctx, `
+		INSERT INTO rate_limit_state (api_key_id, window_start, window_type, request_count, updated_at)
+		VALUES ($1, $2, $3, 1, NOW())
+		ON CONFLICT (api_key_id, window_start, window_type)
+		DO UPDATE SET request_count = rate_limit_state.request_count + 1, updated_at = NOW()
+	`, keyID, windowStart, windowType)
+
+	if err != nil {
+		return fmt.Errorf("upsert: %w", err)
+	}
+
+	return nil
+}
+
+// StartCleanupWorker starts a background goroutine that periodically cleans up expired entries.
+// Returns a stop function to terminate the worker.
+func (r *RateLimiter) StartCleanupWorker(ctx context.Context, interval time.Duration) func() {
+	stopCh := make(chan struct{})
+
+	go func() {
+		ticker := time.NewTicker(interval)
+		defer ticker.Stop()
+
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case <-stopCh:
+				return
+			case <-ticker.C:
+				if err := r.Cleanup(ctx); err != nil {
+					slog.Error("rate limit cleanup failed", "error", err)
+				}
+			}
+		}
+	}()
+
+	return func() {
+		close(stopCh)
+	}
+}
--- a/internal/adapter/postgres/rate_limiter_test.go
+++ b/internal/adapter/postgres/rate_limiter_test.go
@ -0,0 +1,312 @@
+package postgres
+
+import (
+	"context"
+	"crypto/sha256"
+	"database/sql"
+	"encoding/hex"
+	"testing"
+	"time"
+
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/testutil"
+)
+
+// createTestAPIKey creates a test API key and returns its ID.
+func createTestAPIKey(t *testing.T, db *sql.DB, name string) string {
+	t.Helper()
+	repo := NewAPIKeyRepository(db)
+	ctx := context.Background()
+
+	key := &domain.APIKey{
+		Name:      "test-ratelimit-" + name,
+		KeyPrefix: "rl123456",
+		Scopes:    []domain.Scope{domain.ScopeProjectsRead},
+		CreatedBy: "test",
+	}
+
+	h := sha256.Sum256([]byte("ratelimit-key-" + name))
+	keyHash := hex.EncodeToString(h[:])
+
+	err := repo.Create(ctx, key, keyHash)
+	if err != nil {
+		t.Fatalf("create test API key: %v", err)
+	}
+
+	return string(key.ID)
+}
+
+func cleanupTestRateLimits(t *testing.T, db *sql.DB) {
+	t.Helper()
+	// Clean rate limit state for test keys
+	_, err := db.Exec(`
+		DELETE FROM rate_limit_state
+		WHERE api_key_id IN (SELECT id FROM api_keys WHERE name LIKE 'test-ratelimit-%')
+	`)
+	if err != nil {
+		t.Logf("cleanup test rate limits: %v", err)
+	}
+	// Clean up test API keys
+	testutil.CleanupTestKeys(t, db)
+}
+
+func TestRateLimiter_RecordRequest(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestRateLimits(t, db) })
+
+	limiter := NewRateLimiter(db)
+	ctx := context.Background()
+
+	t.Run("records first request", func(t *testing.T) {
+		keyID := createTestAPIKey(t, db, "record-first")
+
+		err := limiter.RecordRequest(ctx, keyID)
+		if err != nil {
+			t.Fatalf("RecordRequest() error = %v", err)
+		}
+
+		// Verify by checking limits
+		result, err := limiter.CheckLimit(ctx, keyID)
+		if err != nil {
+			t.Fatalf("CheckLimit() error = %v", err)
+		}
+
+		// Should have recorded one request
+		if result.RemainingMinute >= result.LimitMinute {
+			t.Error("RemainingMinute should be less than LimitMinute after recording a request")
+		}
+	})
+
+	t.Run("increments existing request count", func(t *testing.T) {
+		keyID := createTestAPIKey(t, db, "record-increment")
+
+		// Record multiple requests
+		for i := 0; i < 3; i++ {
+			err := limiter.RecordRequest(ctx, keyID)
+			if err != nil {
+				t.Fatalf("RecordRequest() iteration %d error = %v", i, err)
+			}
+		}
+
+		result, _ := limiter.CheckLimit(ctx, keyID)
+
+		expectedRemaining := result.LimitMinute - 3
+		if result.RemainingMinute != expectedRemaining {
+			t.Errorf("RemainingMinute = %d, want %d", result.RemainingMinute, expectedRemaining)
+		}
+	})
+}
+
+func TestRateLimiter_CheckLimit(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestRateLimits(t, db) })
+
+	limiter := NewRateLimiter(db)
+	ctx := context.Background()
+
+	t.Run("allows request when under limit", func(t *testing.T) {
+		keyID := createTestAPIKey(t, db, "check-under")
+
+		result, err := limiter.CheckLimit(ctx, keyID)
+		if err != nil {
+			t.Fatalf("CheckLimit() error = %v", err)
+		}
+
+		if !result.Allowed {
+			t.Error("CheckLimit() should allow request when under limit")
+		}
+		if result.RemainingMinute <= 0 {
+			t.Error("RemainingMinute should be positive")
+		}
+		if result.RemainingHour <= 0 {
+			t.Error("RemainingHour should be positive")
+		}
+	})
+
+	t.Run("denies request when minute limit exceeded", func(t *testing.T) {
+		keyID := createTestAPIKey(t, db, "check-minute-exceeded")
+
+		// Get the limit
+		limits, _ := limiter.GetLimits(ctx, keyID)
+
+		// Record enough requests to exceed minute limit
+		for i := 0; i < limits.PerMinute; i++ {
+			_ = limiter.RecordRequest(ctx, keyID)
+		}
+
+		result, err := limiter.CheckLimit(ctx, keyID)
+		if err != nil {
+			t.Fatalf("CheckLimit() error = %v", err)
+		}
+
+		if result.Allowed {
+			t.Error("CheckLimit() should deny request when minute limit exceeded")
+		}
+		if result.RetryAfter <= 0 {
+			t.Error("RetryAfter should be positive when denied")
+		}
+		if result.RemainingMinute != 0 {
+			t.Errorf("RemainingMinute = %d, want 0", result.RemainingMinute)
+		}
+	})
+
+	t.Run("returns correct reset times", func(t *testing.T) {
+		keyID := createTestAPIKey(t, db, "check-reset")
+
+		result, err := limiter.CheckLimit(ctx, keyID)
+		if err != nil {
+			t.Fatalf("CheckLimit() error = %v", err)
+		}
+
+		now := time.Now()
+
+		// ResetMinute should be within the next minute
+		if result.ResetMinute.Before(now) {
+			t.Error("ResetMinute should be in the future")
+		}
+		if result.ResetMinute.After(now.Add(time.Minute + time.Second)) {
+			t.Error("ResetMinute should be within ~1 minute from now")
+		}
+
+		// ResetHour should be within the next hour
+		if result.ResetHour.Before(now) {
+			t.Error("ResetHour should be in the future")
+		}
+		if result.ResetHour.After(now.Add(time.Hour + time.Second)) {
+			t.Error("ResetHour should be within ~1 hour from now")
+		}
+	})
+}
+
+func TestRateLimiter_GetLimits(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestRateLimits(t, db) })
+
+	limiter := NewRateLimiter(db)
+	ctx := context.Background()
+
+	t.Run("returns default limits for unknown key", func(t *testing.T) {
+		// Use a UUID that doesn't exist
+		limits, err := limiter.GetLimits(ctx, "00000000-0000-0000-0000-000000000000")
+		if err != nil {
+			t.Fatalf("GetLimits() error = %v", err)
+		}
+
+		defaults := domain.DefaultRateLimitConfig()
+		if limits.PerMinute != defaults.PerMinute {
+			t.Errorf("PerMinute = %d, want %d", limits.PerMinute, defaults.PerMinute)
+		}
+		if limits.PerHour != defaults.PerHour {
+			t.Errorf("PerHour = %d, want %d", limits.PerHour, defaults.PerHour)
+		}
+	})
+
+	t.Run("returns limits from existing key", func(t *testing.T) {
+		keyID := createTestAPIKey(t, db, "get-limits")
+
+		limits, err := limiter.GetLimits(ctx, keyID)
+		if err != nil {
+			t.Fatalf("GetLimits() error = %v", err)
+		}
+
+		// Should return defaults since we didn't set custom limits
+		defaults := domain.DefaultRateLimitConfig()
+		if limits.PerMinute != defaults.PerMinute {
+			t.Errorf("PerMinute = %d, want %d", limits.PerMinute, defaults.PerMinute)
+		}
+	})
+}
+
+func TestRateLimiter_Cleanup(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestRateLimits(t, db) })
+
+	limiter := NewRateLimiter(db)
+	ctx := context.Background()
+
+	t.Run("cleanup runs without error", func(t *testing.T) {
+		// Create some rate limit entries
+		keyID := createTestAPIKey(t, db, "cleanup-entry")
+		_ = limiter.RecordRequest(ctx, keyID)
+
+		err := limiter.Cleanup(ctx)
+		if err != nil {
+			t.Fatalf("Cleanup() error = %v", err)
+		}
+
+		// Recent entries should not be deleted
+		result, _ := limiter.CheckLimit(ctx, keyID)
+		if result.RemainingMinute >= result.LimitMinute {
+			// If the entry was cleaned up, remaining would equal limit
+			t.Log("Note: Recent rate limit entry was not cleaned up (expected behavior)")
+		}
+	})
+}
+
+func TestRateLimiter_WindowHandling(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestRateLimits(t, db) })
+
+	limiter := NewRateLimiter(db)
+	ctx := context.Background()
+
+	t.Run("minute and hour windows are tracked separately", func(t *testing.T) {
+		keyID := createTestAPIKey(t, db, "windows")
+
+		// Record a request
+		_ = limiter.RecordRequest(ctx, keyID)
+
+		result, err := limiter.CheckLimit(ctx, keyID)
+		if err != nil {
+			t.Fatalf("CheckLimit() error = %v", err)
+		}
+
+		// Both counters should reflect one request
+		expectedMinuteRemaining := result.LimitMinute - 1
+		expectedHourRemaining := result.LimitHour - 1
+
+		if result.RemainingMinute != expectedMinuteRemaining {
+			t.Errorf("RemainingMinute = %d, want %d", result.RemainingMinute, expectedMinuteRemaining)
+		}
+		if result.RemainingHour != expectedHourRemaining {
+			t.Errorf("RemainingHour = %d, want %d", result.RemainingHour, expectedHourRemaining)
+		}
+	})
+}
+
+func TestRateLimiter_ConcurrentRequests(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestRateLimits(t, db) })
+
+	limiter := NewRateLimiter(db)
+	ctx := context.Background()
+	keyID := createTestAPIKey(t, db, "concurrent")
+
+	// Run concurrent requests
+	const numRequests = 10
+	done := make(chan error, numRequests)
+
+	for i := 0; i < numRequests; i++ {
+		go func() {
+			done <- limiter.RecordRequest(ctx, keyID)
+		}()
+	}
+
+	// Wait for all requests to complete
+	for i := 0; i < numRequests; i++ {
+		if err := <-done; err != nil {
+			t.Errorf("Concurrent RecordRequest() error = %v", err)
+		}
+	}
+
+	// Verify the count
+	result, err := limiter.CheckLimit(ctx, keyID)
+	if err != nil {
+		t.Fatalf("CheckLimit() error = %v", err)
+	}
+
+	expectedRemaining := result.LimitMinute - numRequests
+	if result.RemainingMinute != expectedRemaining {
+		t.Errorf("RemainingMinute = %d, want %d (all concurrent requests should be counted)", result.RemainingMinute, expectedRemaining)
+	}
+}
--- a/internal/adapter/postgres/webhook.go
+++ b/internal/adapter/postgres/webhook.go
@ -0,0 +1,344 @@
+// Package postgres provides PostgreSQL-based implementations of port interfaces.
+package postgres
+
+import (
+	"context"
+	"database/sql"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"time"
+
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/port"
+)
+
+// WebhookRepository implements port.WebhookRepository using PostgreSQL.
+type WebhookRepository struct {
+	db *sql.DB
+}
+
+// NewWebhookRepository creates a new PostgreSQL webhook repository.
+func NewWebhookRepository(db *sql.DB) *WebhookRepository {
+	return &WebhookRepository{db: db}
+}
+
+// Ensure WebhookRepository implements port.WebhookRepository at compile time.
+var _ port.WebhookRepository = (*WebhookRepository)(nil)
+
+// Create creates a new webhook subscription.
+func (r *WebhookRepository) Create(ctx context.Context, webhook *domain.Webhook) error {
+	eventsJSON, err := json.Marshal(webhook.Events)
+	if err != nil {
+		return fmt.Errorf("marshal events: %w", err)
+	}
+
+	err = r.db.QueryRowContext(ctx, `
+		INSERT INTO webhooks (id, project_id, url, secret, events, enabled, created_at, updated_at)
+		VALUES ($1, $2, $3, $4, $5, $6, $7, $7)
+		RETURNING created_at, updated_at
+	`, webhook.ID, webhook.ProjectID, webhook.URL, nullString(webhook.Secret),
+		string(eventsJSON), webhook.Enabled, time.Now()).Scan(&webhook.CreatedAt, &webhook.UpdatedAt)
+
+	if err != nil {
+		return fmt.Errorf("create webhook: %w", err)
+	}
+
+	return nil
+}
+
+// Update updates an existing webhook.
+func (r *WebhookRepository) Update(ctx context.Context, webhook *domain.Webhook) error {
+	eventsJSON, err := json.Marshal(webhook.Events)
+	if err != nil {
+		return fmt.Errorf("marshal events: %w", err)
+	}
+
+	now := time.Now()
+	result, err := r.db.ExecContext(ctx, `
+		UPDATE webhooks
+		SET url = $1, secret = $2, events = $3, enabled = $4, updated_at = $5
+		WHERE id = $6
+	`, webhook.URL, nullString(webhook.Secret), string(eventsJSON), webhook.Enabled, now, webhook.ID)
+
+	if err != nil {
+		return fmt.Errorf("update webhook: %w", err)
+	}
+
+	rows, err := result.RowsAffected()
+	if err != nil {
+		return fmt.Errorf("rows affected: %w", err)
+	}
+
+	if rows == 0 {
+		return domain.ErrWebhookNotFound
+	}
+
+	webhook.UpdatedAt = now
+	return nil
+}
+
+// Delete deletes a webhook by ID.
+func (r *WebhookRepository) Delete(ctx context.Context, id domain.WebhookID) error {
+	result, err := r.db.ExecContext(ctx, `DELETE FROM webhooks WHERE id = $1`, id)
+	if err != nil {
+		return fmt.Errorf("delete webhook: %w", err)
+	}
+
+	rows, err := result.RowsAffected()
+	if err != nil {
+		return fmt.Errorf("rows affected: %w", err)
+	}
+
+	if rows == 0 {
+		return domain.ErrWebhookNotFound
+	}
+
+	return nil
+}
+
+// GetByID retrieves a webhook by ID.
+func (r *WebhookRepository) GetByID(ctx context.Context, id domain.WebhookID) (*domain.Webhook, error) {
+	var webhook domain.Webhook
+	var webhookID string
+	var secret sql.NullString
+	var eventsJSON string
+
+	err := r.db.QueryRowContext(ctx, `
+		SELECT id, project_id, url, secret, events, enabled, created_at, updated_at
+		FROM webhooks
+		WHERE id = $1
+	`, id).Scan(
+		&webhookID,
+		&webhook.ProjectID,
+		&webhook.URL,
+		&secret,
+		&eventsJSON,
+		&webhook.Enabled,
+		&webhook.CreatedAt,
+		&webhook.UpdatedAt,
+	)
+
+	if errors.Is(err, sql.ErrNoRows) {
+		return nil, domain.ErrWebhookNotFound
+	}
+	if err != nil {
+		return nil, fmt.Errorf("get webhook: %w", err)
+	}
+
+	webhook.ID = domain.WebhookID(webhookID)
+	if secret.Valid {
+		webhook.Secret = secret.String
+	}
+
+	if err := json.Unmarshal([]byte(eventsJSON), &webhook.Events); err != nil {
+		return nil, fmt.Errorf("unmarshal events: %w", err)
+	}
+
+	return &webhook, nil
+}
+
+// ListByProject returns all webhooks for a project.
+func (r *WebhookRepository) ListByProject(ctx context.Context, projectID string) ([]*domain.Webhook, error) {
+	rows, err := r.db.QueryContext(ctx, `
+		SELECT id, project_id, url, secret, events, enabled, created_at, updated_at
+		FROM webhooks
+		WHERE project_id = $1
+		ORDER BY created_at DESC
+	`, projectID)
+	if err != nil {
+		return nil, fmt.Errorf("list webhooks: %w", err)
+	}
+	defer func() { _ = rows.Close() }()
+
+	return scanWebhooks(rows)
+}
+
+// ListEnabledByProjectAndEvent returns enabled webhooks that subscribe to a specific event type.
+func (r *WebhookRepository) ListEnabledByProjectAndEvent(ctx context.Context, projectID string, eventType domain.WebhookEventType) ([]*domain.Webhook, error) {
+	// Use JSON contains check - events column contains the event type
+	rows, err := r.db.QueryContext(ctx, `
+		SELECT id, project_id, url, secret, events, enabled, created_at, updated_at
+		FROM webhooks
+		WHERE project_id = $1
+		  AND enabled = true
+		  AND events::jsonb ? $2
+		ORDER BY created_at ASC
+	`, projectID, string(eventType))
+	if err != nil {
+		return nil, fmt.Errorf("list enabled webhooks: %w", err)
+	}
+	defer func() { _ = rows.Close() }()
+
+	return scanWebhooks(rows)
+}
+
+// scanWebhooks scans rows into a slice of webhooks.
+func scanWebhooks(rows *sql.Rows) ([]*domain.Webhook, error) {
+	var webhooks []*domain.Webhook
+
+	for rows.Next() {
+		var webhook domain.Webhook
+		var webhookID string
+		var secret sql.NullString
+		var eventsJSON string
+
+		if err := rows.Scan(
+			&webhookID,
+			&webhook.ProjectID,
+			&webhook.URL,
+			&secret,
+			&eventsJSON,
+			&webhook.Enabled,
+			&webhook.CreatedAt,
+			&webhook.UpdatedAt,
+		); err != nil {
+			return nil, fmt.Errorf("scan webhook: %w", err)
+		}
+
+		webhook.ID = domain.WebhookID(webhookID)
+		if secret.Valid {
+			webhook.Secret = secret.String
+		}
+
+		if err := json.Unmarshal([]byte(eventsJSON), &webhook.Events); err != nil {
+			return nil, fmt.Errorf("unmarshal events: %w", err)
+		}
+
+		webhooks = append(webhooks, &webhook)
+	}
+
+	if err := rows.Err(); err != nil {
+		return nil, fmt.Errorf("rows error: %w", err)
+	}
+
+	return webhooks, nil
+}
+
+// RecordDelivery records a webhook delivery attempt.
+func (r *WebhookRepository) RecordDelivery(ctx context.Context, delivery *domain.WebhookDelivery) error {
+	_, err := r.db.ExecContext(ctx, `
+		INSERT INTO webhook_deliveries (id, webhook_id, event_type, payload, response_status, response_body, delivered_at, success, retry_count, error_message)
+		VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
+	`,
+		delivery.ID,
+		delivery.WebhookID,
+		delivery.EventType,
+		delivery.Payload,
+		nullInt(delivery.ResponseStatus),
+		nullString(delivery.ResponseBody),
+		delivery.DeliveredAt,
+		delivery.Success,
+		delivery.RetryCount,
+		nullString(delivery.ErrorMessage),
+	)
+
+	if err != nil {
+		return fmt.Errorf("record delivery: %w", err)
+	}
+
+	return nil
+}
+
+// GetDeliveries returns delivery history for a webhook.
+func (r *WebhookRepository) GetDeliveries(ctx context.Context, webhookID domain.WebhookID, filters *domain.WebhookDeliveryFilters) ([]*domain.WebhookDelivery, error) {
+	if filters == nil {
+		filters = domain.DefaultWebhookDeliveryFilters()
+	}
+
+	query := `
+		SELECT id, webhook_id, event_type, payload, response_status, response_body, delivered_at, success, retry_count, error_message
+		FROM webhook_deliveries
+		WHERE webhook_id = $1
+	`
+	args := []any{webhookID}
+	argNum := 2
+
+	if filters.EventType != nil {
+		query += fmt.Sprintf(" AND event_type = $%d", argNum)
+		args = append(args, string(*filters.EventType))
+		argNum++
+	}
+
+	if filters.Success != nil {
+		query += fmt.Sprintf(" AND success = $%d", argNum)
+		args = append(args, *filters.Success)
+		argNum++
+	}
+
+	query += " ORDER BY delivered_at DESC"
+	query += fmt.Sprintf(" LIMIT $%d OFFSET $%d", argNum, argNum+1)
+	args = append(args, filters.Limit, filters.Offset)
+
+	rows, err := r.db.QueryContext(ctx, query, args...)
+	if err != nil {
+		return nil, fmt.Errorf("get deliveries: %w", err)
+	}
+	defer func() { _ = rows.Close() }()
+
+	var deliveries []*domain.WebhookDelivery
+	for rows.Next() {
+		var delivery domain.WebhookDelivery
+		var deliveryID, webhookIDStr string
+		var eventType string
+		var responseStatus sql.NullInt32
+		var responseBody, errorMessage sql.NullString
+
+		if err := rows.Scan(
+			&deliveryID,
+			&webhookIDStr,
+			&eventType,
+			&delivery.Payload,
+			&responseStatus,
+			&responseBody,
+			&delivery.DeliveredAt,
+			&delivery.Success,
+			&delivery.RetryCount,
+			&errorMessage,
+		); err != nil {
+			return nil, fmt.Errorf("scan delivery: %w", err)
+		}
+
+		delivery.ID = domain.WebhookDeliveryID(deliveryID)
+		delivery.WebhookID = domain.WebhookID(webhookIDStr)
+		delivery.EventType = domain.WebhookEventType(eventType)
+		if responseStatus.Valid {
+			delivery.ResponseStatus = int(responseStatus.Int32)
+		}
+		if responseBody.Valid {
+			delivery.ResponseBody = responseBody.String
+		}
+		if errorMessage.Valid {
+			delivery.ErrorMessage = errorMessage.String
+		}
+
+		deliveries = append(deliveries, &delivery)
+	}
+
+	if err := rows.Err(); err != nil {
+		return nil, fmt.Errorf("rows error: %w", err)
+	}
+
+	return deliveries, nil
+}
+
+// CleanupOldDeliveries removes delivery records older than the specified number of days.
+func (r *WebhookRepository) CleanupOldDeliveries(ctx context.Context, olderThanDays int) (int64, error) {
+	result, err := r.db.ExecContext(ctx, `
+		DELETE FROM webhook_deliveries
+		WHERE delivered_at < NOW() - INTERVAL '1 day' * $1
+	`, olderThanDays)
+	if err != nil {
+		return 0, fmt.Errorf("cleanup old deliveries: %w", err)
+	}
+
+	return result.RowsAffected()
+}
+
+// nullInt returns a sql.NullInt32 for optional int fields.
+func nullInt(i int) sql.NullInt32 {
+	if i == 0 {
+		return sql.NullInt32{}
+	}
+	return sql.NullInt32{Int32: int32(i), Valid: true}
+}
--- a/internal/adapter/postgres/webhook_test.go
+++ b/internal/adapter/postgres/webhook_test.go
@ -0,0 +1,534 @@
+package postgres
+
+import (
+	"context"
+	"database/sql"
+	"testing"
+	"time"
+
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/testutil"
+)
+
+func cleanupTestWebhooks(t *testing.T, db *sql.DB) {
+	t.Helper()
+	// Clean deliveries first due to foreign key constraint
+	_, err := db.Exec("DELETE FROM webhook_deliveries WHERE webhook_id IN (SELECT id FROM webhooks WHERE project_id LIKE 'test-%')")
+	if err != nil {
+		t.Logf("cleanup test webhook deliveries: %v", err)
+	}
+	_, err = db.Exec("DELETE FROM webhooks WHERE project_id LIKE 'test-%'")
+	if err != nil {
+		t.Logf("cleanup test webhooks: %v", err)
+	}
+}
+
+func TestWebhookRepository_Create(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestWebhooks(t, db) })
+
+	repo := NewWebhookRepository(db)
+	ctx := context.Background()
+
+	t.Run("creates webhook successfully", func(t *testing.T) {
+		webhook := &domain.Webhook{
+			ID:        "wh-test-create-1",
+			ProjectID: "test-proj-webhook-1",
+			URL:       "https://example.com/webhook",
+			Secret:    "test-secret-123",
+			Events: []domain.WebhookEventType{
+				domain.WebhookEventCommandStarted,
+				domain.WebhookEventCommandCompleted,
+			},
+			Enabled: true,
+		}
+
+		err := repo.Create(ctx, webhook)
+		if err != nil {
+			t.Fatalf("Create() error = %v", err)
+		}
+
+		if webhook.CreatedAt.IsZero() {
+			t.Error("CreatedAt should be set after create")
+		}
+		if webhook.UpdatedAt.IsZero() {
+			t.Error("UpdatedAt should be set after create")
+		}
+	})
+
+	t.Run("creates webhook without secret", func(t *testing.T) {
+		webhook := &domain.Webhook{
+			ID:        "wh-test-create-nosecret",
+			ProjectID: "test-proj-webhook-2",
+			URL:       "https://example.com/webhook2",
+			Secret:    "",
+			Events:    []domain.WebhookEventType{domain.WebhookEventCommandStarted},
+			Enabled:   true,
+		}
+
+		err := repo.Create(ctx, webhook)
+		if err != nil {
+			t.Fatalf("Create() error = %v", err)
+		}
+
+		// Retrieve and verify secret is empty
+		retrieved, _ := repo.GetByID(ctx, "wh-test-create-nosecret")
+		if retrieved.Secret != "" {
+			t.Errorf("Secret = %q, want empty", retrieved.Secret)
+		}
+	})
+}
+
+func TestWebhookRepository_Update(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestWebhooks(t, db) })
+
+	repo := NewWebhookRepository(db)
+	ctx := context.Background()
+
+	t.Run("updates existing webhook", func(t *testing.T) {
+		// Create webhook first
+		webhook := &domain.Webhook{
+			ID:        "wh-test-update-1",
+			ProjectID: "test-proj-update",
+			URL:       "https://example.com/original",
+			Secret:    "original-secret",
+			Events:    []domain.WebhookEventType{domain.WebhookEventCommandStarted},
+			Enabled:   true,
+		}
+		repo.Create(ctx, webhook)
+
+		// Update it
+		webhook.URL = "https://example.com/updated"
+		webhook.Secret = "updated-secret"
+		webhook.Enabled = false
+		webhook.Events = []domain.WebhookEventType{
+			domain.WebhookEventCommandCompleted,
+			domain.WebhookEventCommandFailed,
+		}
+
+		originalUpdatedAt := webhook.UpdatedAt
+		time.Sleep(10 * time.Millisecond) // Ensure timestamp changes
+
+		err := repo.Update(ctx, webhook)
+		if err != nil {
+			t.Fatalf("Update() error = %v", err)
+		}
+
+		if !webhook.UpdatedAt.After(originalUpdatedAt) {
+			t.Error("UpdatedAt should be updated after Update()")
+		}
+
+		// Verify changes
+		retrieved, _ := repo.GetByID(ctx, "wh-test-update-1")
+		if retrieved.URL != "https://example.com/updated" {
+			t.Errorf("URL = %q, want %q", retrieved.URL, "https://example.com/updated")
+		}
+		if retrieved.Secret != "updated-secret" {
+			t.Errorf("Secret = %q, want %q", retrieved.Secret, "updated-secret")
+		}
+		if retrieved.Enabled {
+			t.Error("Enabled should be false after update")
+		}
+		if len(retrieved.Events) != 2 {
+			t.Errorf("Events length = %d, want 2", len(retrieved.Events))
+		}
+	})
+
+	t.Run("returns error for non-existent webhook", func(t *testing.T) {
+		webhook := &domain.Webhook{
+			ID:        "wh-nonexistent",
+			ProjectID: "test-proj",
+			URL:       "https://example.com",
+			Events:    []domain.WebhookEventType{domain.WebhookEventCommandStarted},
+		}
+
+		err := repo.Update(ctx, webhook)
+		if err != domain.ErrWebhookNotFound {
+			t.Errorf("Update() error = %v, want %v", err, domain.ErrWebhookNotFound)
+		}
+	})
+}
+
+func TestWebhookRepository_Delete(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestWebhooks(t, db) })
+
+	repo := NewWebhookRepository(db)
+	ctx := context.Background()
+
+	t.Run("deletes existing webhook", func(t *testing.T) {
+		webhook := &domain.Webhook{
+			ID:        "wh-test-delete-1",
+			ProjectID: "test-proj-delete",
+			URL:       "https://example.com/delete",
+			Events:    []domain.WebhookEventType{domain.WebhookEventCommandStarted},
+			Enabled:   true,
+		}
+		repo.Create(ctx, webhook)
+
+		err := repo.Delete(ctx, "wh-test-delete-1")
+		if err != nil {
+			t.Fatalf("Delete() error = %v", err)
+		}
+
+		// Verify deleted
+		_, err = repo.GetByID(ctx, "wh-test-delete-1")
+		if err != domain.ErrWebhookNotFound {
+			t.Errorf("GetByID() after delete error = %v, want %v", err, domain.ErrWebhookNotFound)
+		}
+	})
+
+	t.Run("returns error for non-existent webhook", func(t *testing.T) {
+		err := repo.Delete(ctx, "wh-nonexistent")
+		if err != domain.ErrWebhookNotFound {
+			t.Errorf("Delete() error = %v, want %v", err, domain.ErrWebhookNotFound)
+		}
+	})
+}
+
+func TestWebhookRepository_GetByID(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestWebhooks(t, db) })
+
+	repo := NewWebhookRepository(db)
+	ctx := context.Background()
+
+	t.Run("gets existing webhook", func(t *testing.T) {
+		webhook := &domain.Webhook{
+			ID:        "wh-test-getbyid-1",
+			ProjectID: "test-proj-getbyid",
+			URL:       "https://example.com/getbyid",
+			Secret:    "get-secret",
+			Events: []domain.WebhookEventType{
+				domain.WebhookEventCommandStarted,
+				domain.WebhookEventCommandCompleted,
+			},
+			Enabled: true,
+		}
+		repo.Create(ctx, webhook)
+
+		retrieved, err := repo.GetByID(ctx, "wh-test-getbyid-1")
+		if err != nil {
+			t.Fatalf("GetByID() error = %v", err)
+		}
+
+		if retrieved.URL != "https://example.com/getbyid" {
+			t.Errorf("URL = %q, want %q", retrieved.URL, "https://example.com/getbyid")
+		}
+		if retrieved.Secret != "get-secret" {
+			t.Errorf("Secret = %q, want %q", retrieved.Secret, "get-secret")
+		}
+		if len(retrieved.Events) != 2 {
+			t.Errorf("Events length = %d, want 2", len(retrieved.Events))
+		}
+	})
+
+	t.Run("returns error for non-existent webhook", func(t *testing.T) {
+		_, err := repo.GetByID(ctx, "wh-nonexistent")
+		if err != domain.ErrWebhookNotFound {
+			t.Errorf("GetByID() error = %v, want %v", err, domain.ErrWebhookNotFound)
+		}
+	})
+}
+
+func TestWebhookRepository_ListByProject(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestWebhooks(t, db) })
+
+	repo := NewWebhookRepository(db)
+	ctx := context.Background()
+
+	projectID := "test-proj-list"
+
+	// Create multiple webhooks
+	for i := 0; i < 3; i++ {
+		webhook := &domain.Webhook{
+			ID:        domain.WebhookID("wh-test-list-" + string(rune('a'+i))),
+			ProjectID: projectID,
+			URL:       "https://example.com/list" + string(rune('a'+i)),
+			Events:    []domain.WebhookEventType{domain.WebhookEventCommandStarted},
+			Enabled:   true,
+		}
+		repo.Create(ctx, webhook)
+		time.Sleep(10 * time.Millisecond) // Ensure different timestamps
+	}
+
+	// Create webhook in different project
+	otherWebhook := &domain.Webhook{
+		ID:        "wh-test-list-other",
+		ProjectID: "test-proj-list-other",
+		URL:       "https://example.com/other",
+		Events:    []domain.WebhookEventType{domain.WebhookEventCommandStarted},
+		Enabled:   true,
+	}
+	repo.Create(ctx, otherWebhook)
+
+	t.Run("lists all webhooks for project", func(t *testing.T) {
+		webhooks, err := repo.ListByProject(ctx, projectID)
+		if err != nil {
+			t.Fatalf("ListByProject() error = %v", err)
+		}
+
+		if len(webhooks) != 3 {
+			t.Errorf("ListByProject() returned %d webhooks, want 3", len(webhooks))
+		}
+
+		// Verify all belong to the project
+		for _, wh := range webhooks {
+			if wh.ProjectID != projectID {
+				t.Errorf("Webhook has ProjectID = %q, want %q", wh.ProjectID, projectID)
+			}
+		}
+	})
+
+	t.Run("returns empty slice for project with no webhooks", func(t *testing.T) {
+		webhooks, err := repo.ListByProject(ctx, "test-proj-no-webhooks")
+		if err != nil {
+			t.Fatalf("ListByProject() error = %v", err)
+		}
+
+		if len(webhooks) != 0 {
+			t.Errorf("ListByProject() returned %d webhooks, want 0", len(webhooks))
+		}
+	})
+}
+
+func TestWebhookRepository_ListEnabledByProjectAndEvent(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestWebhooks(t, db) })
+
+	repo := NewWebhookRepository(db)
+	ctx := context.Background()
+
+	projectID := "test-proj-enabled"
+
+	// Create webhooks with different configurations
+	enabledStarted := &domain.Webhook{
+		ID:        "wh-enabled-started",
+		ProjectID: projectID,
+		URL:       "https://example.com/enabled-started",
+		Events:    []domain.WebhookEventType{domain.WebhookEventCommandStarted},
+		Enabled:   true,
+	}
+	repo.Create(ctx, enabledStarted)
+
+	enabledCompleted := &domain.Webhook{
+		ID:        "wh-enabled-completed",
+		ProjectID: projectID,
+		URL:       "https://example.com/enabled-completed",
+		Events:    []domain.WebhookEventType{domain.WebhookEventCommandCompleted},
+		Enabled:   true,
+	}
+	repo.Create(ctx, enabledCompleted)
+
+	disabledStarted := &domain.Webhook{
+		ID:        "wh-disabled-started",
+		ProjectID: projectID,
+		URL:       "https://example.com/disabled-started",
+		Events:    []domain.WebhookEventType{domain.WebhookEventCommandStarted},
+		Enabled:   false,
+	}
+	repo.Create(ctx, disabledStarted)
+
+	t.Run("returns only enabled webhooks with matching event", func(t *testing.T) {
+		webhooks, err := repo.ListEnabledByProjectAndEvent(ctx, projectID, domain.WebhookEventCommandStarted)
+		if err != nil {
+			t.Fatalf("ListEnabledByProjectAndEvent() error = %v", err)
+		}
+
+		if len(webhooks) != 1 {
+			t.Errorf("ListEnabledByProjectAndEvent() returned %d webhooks, want 1", len(webhooks))
+		}
+
+		if len(webhooks) > 0 && webhooks[0].ID != "wh-enabled-started" {
+			t.Errorf("Webhook ID = %q, want %q", webhooks[0].ID, "wh-enabled-started")
+		}
+	})
+
+	t.Run("returns empty when no matching webhooks", func(t *testing.T) {
+		webhooks, err := repo.ListEnabledByProjectAndEvent(ctx, projectID, domain.WebhookEventCommandFailed)
+		if err != nil {
+			t.Fatalf("ListEnabledByProjectAndEvent() error = %v", err)
+		}
+
+		if len(webhooks) != 0 {
+			t.Errorf("ListEnabledByProjectAndEvent() returned %d webhooks, want 0", len(webhooks))
+		}
+	})
+}
+
+func TestWebhookRepository_RecordDelivery(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestWebhooks(t, db) })
+
+	repo := NewWebhookRepository(db)
+	ctx := context.Background()
+
+	// Create webhook first
+	webhook := &domain.Webhook{
+		ID:        "wh-test-delivery",
+		ProjectID: "test-proj-delivery",
+		URL:       "https://example.com/delivery",
+		Events:    []domain.WebhookEventType{domain.WebhookEventCommandStarted},
+		Enabled:   true,
+	}
+	repo.Create(ctx, webhook)
+
+	t.Run("records successful delivery", func(t *testing.T) {
+		delivery := &domain.WebhookDelivery{
+			ID:             "del-test-success",
+			WebhookID:      "wh-test-delivery",
+			EventType:      domain.WebhookEventCommandStarted,
+			Payload:        `{"event":"command.started"}`,
+			ResponseStatus: 200,
+			ResponseBody:   "OK",
+			DeliveredAt:    time.Now(),
+			Success:        true,
+			RetryCount:     0,
+		}
+
+		err := repo.RecordDelivery(ctx, delivery)
+		if err != nil {
+			t.Fatalf("RecordDelivery() error = %v", err)
+		}
+	})
+
+	t.Run("records failed delivery", func(t *testing.T) {
+		delivery := &domain.WebhookDelivery{
+			ID:             "del-test-failure",
+			WebhookID:      "wh-test-delivery",
+			EventType:      domain.WebhookEventCommandStarted,
+			Payload:        `{"event":"command.started"}`,
+			ResponseStatus: 500,
+			ResponseBody:   "Internal Server Error",
+			DeliveredAt:    time.Now(),
+			Success:        false,
+			RetryCount:     3,
+			ErrorMessage:   "server returned 500",
+		}
+
+		err := repo.RecordDelivery(ctx, delivery)
+		if err != nil {
+			t.Fatalf("RecordDelivery() error = %v", err)
+		}
+	})
+}
+
+func TestWebhookRepository_GetDeliveries(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestWebhooks(t, db) })
+
+	repo := NewWebhookRepository(db)
+	ctx := context.Background()
+
+	// Create webhook
+	webhook := &domain.Webhook{
+		ID:        "wh-test-get-deliveries",
+		ProjectID: "test-proj-get-deliveries",
+		URL:       "https://example.com/deliveries",
+		Events:    []domain.WebhookEventType{domain.WebhookEventCommandStarted},
+		Enabled:   true,
+	}
+	repo.Create(ctx, webhook)
+
+	// Create deliveries
+	for i := 0; i < 5; i++ {
+		delivery := &domain.WebhookDelivery{
+			ID:             domain.WebhookDeliveryID("del-get-" + string(rune('a'+i))),
+			WebhookID:      "wh-test-get-deliveries",
+			EventType:      domain.WebhookEventCommandStarted,
+			Payload:        `{"event":"command.started"}`,
+			ResponseStatus: 200,
+			DeliveredAt:    time.Now(),
+			Success:        i%2 == 0, // Alternate success/failure
+		}
+		_ = repo.RecordDelivery(ctx, delivery)
+		time.Sleep(10 * time.Millisecond)
+	}
+
+	t.Run("gets all deliveries", func(t *testing.T) {
+		deliveries, err := repo.GetDeliveries(ctx, "wh-test-get-deliveries", nil)
+		if err != nil {
+			t.Fatalf("GetDeliveries() error = %v", err)
+		}
+
+		if len(deliveries) != 5 {
+			t.Errorf("GetDeliveries() returned %d deliveries, want 5", len(deliveries))
+		}
+	})
+
+	t.Run("filters by success", func(t *testing.T) {
+		success := true
+		deliveries, err := repo.GetDeliveries(ctx, "wh-test-get-deliveries", &domain.WebhookDeliveryFilters{
+			Success: &success,
+			Limit:   100,
+		})
+		if err != nil {
+			t.Fatalf("GetDeliveries() error = %v", err)
+		}
+
+		for _, d := range deliveries {
+			if !d.Success {
+				t.Error("GetDeliveries() returned unsuccessful delivery when filtering by success=true")
+			}
+		}
+	})
+
+	t.Run("applies limit", func(t *testing.T) {
+		deliveries, err := repo.GetDeliveries(ctx, "wh-test-get-deliveries", &domain.WebhookDeliveryFilters{
+			Limit: 2,
+		})
+		if err != nil {
+			t.Fatalf("GetDeliveries() error = %v", err)
+		}
+
+		if len(deliveries) != 2 {
+			t.Errorf("GetDeliveries() returned %d deliveries, want 2", len(deliveries))
+		}
+	})
+}
+
+func TestWebhookRepository_CleanupOldDeliveries(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { cleanupTestWebhooks(t, db) })
+
+	repo := NewWebhookRepository(db)
+	ctx := context.Background()
+
+	// Create webhook
+	webhook := &domain.Webhook{
+		ID:        "wh-test-cleanup",
+		ProjectID: "test-proj-cleanup",
+		URL:       "https://example.com/cleanup",
+		Events:    []domain.WebhookEventType{domain.WebhookEventCommandStarted},
+		Enabled:   true,
+	}
+	repo.Create(ctx, webhook)
+
+	// Create deliveries
+	for i := 0; i < 3; i++ {
+		delivery := &domain.WebhookDelivery{
+			ID:             domain.WebhookDeliveryID("del-cleanup-" + string(rune('a'+i))),
+			WebhookID:      "wh-test-cleanup",
+			EventType:      domain.WebhookEventCommandStarted,
+			Payload:        `{}`,
+			ResponseStatus: 200,
+			DeliveredAt:    time.Now(),
+			Success:        true,
+		}
+		_ = repo.RecordDelivery(ctx, delivery)
+	}
+
+	t.Run("cleanup runs without error", func(t *testing.T) {
+		deleted, err := repo.CleanupOldDeliveries(ctx, 30)
+		if err != nil {
+			t.Fatalf("CleanupOldDeliveries() error = %v", err)
+		}
+
+		// Newly created deliveries shouldn't be deleted
+		if deleted != 0 {
+			t.Logf("CleanupOldDeliveries() deleted %d deliveries (expected 0 for new deliveries)", deleted)
+		}
+	})
+}
--- a/internal/auth/middleware.go
+++ b/internal/auth/middleware.go
@ -9,6 +9,41 @@ import (
 	"github.com/orchard9/rdev/pkg/api"
 )

+// getClientIP extracts the client IP from the request.
+func getClientIP(r *http.Request) string {
+	// Check X-Forwarded-For header (set by proxies/load balancers)
+	if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
+		// Take the first IP in the chain
+		for i := 0; i < len(xff); i++ {
+			if xff[i] == ',' {
+				return strings.TrimSpace(xff[:i])
+			}
+		}
+		return strings.TrimSpace(xff)
+	}
+
+	// Check X-Real-IP header
+	if xri := r.Header.Get("X-Real-IP"); xri != "" {
+		return strings.TrimSpace(xri)
+	}
+
+	// Fall back to RemoteAddr
+	// RemoteAddr is "IP:port", so strip the port
+	addr := r.RemoteAddr
+	// Handle IPv6 addresses like "[::1]:8080"
+	if strings.HasPrefix(addr, "[") {
+		if idx := strings.LastIndex(addr, "]:"); idx != -1 {
+			return addr[1:idx]
+		}
+		return strings.Trim(addr, "[]")
+	}
+	// Handle IPv4 addresses like "192.168.1.1:8080"
+	if idx := strings.LastIndex(addr, ":"); idx != -1 {
+		return addr[:idx]
+	}
+	return addr
+}
+
 // Header for API key authentication.
 const HeaderAPIKey = "X-API-Key"

@ -47,6 +82,12 @@ func Middleware(svc *Service) func(http.Handler) http.Handler {
 				return
 			}

+			// Skip auth for metrics
+			if r.URL.Path == "/metrics" {
+				next.ServeHTTP(w, r)
+				return
+			}
+
 			// Get key from header
 			key := r.Header.Get(HeaderAPIKey)
 			if key == "" {
@ -81,6 +122,13 @@ func Middleware(svc *Service) func(http.Handler) http.Handler {
 				return
 			}

+			// Check IP allowlist
+			clientIP := getClientIP(r)
+			if !apiKey.IsIPAllowed(clientIP) {
+				api.WriteError(w, r, http.StatusForbidden, "IP_NOT_ALLOWED", "IP address not allowed for this API key")
+				return
+			}
+
 			// Add key to context
 			ctx := context.WithValue(r.Context(), contextKeyAPIKey, apiKey)
 			next.ServeHTTP(w, r.WithContext(ctx))
--- a/internal/auth/middleware_bench_test.go
+++ b/internal/auth/middleware_bench_test.go
@ -0,0 +1,293 @@
+package auth
+
+import (
+	"context"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+)
+
+// BenchmarkAuthMiddleware benchmarks the authentication middleware overhead.
+func BenchmarkAuthMiddleware(b *testing.B) {
+	// Create a mock API key
+	apiKey := &APIKey{
+		ID:        "test-key-id",
+		Name:      "benchmark-key",
+		KeyPrefix: "rdev",
+		Scopes:    []Scope{ScopeProjectsExecute, ScopeProjectsRead},
+		CreatedAt: time.Now(),
+	}
+
+	// Simple handler that just writes OK
+	handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	})
+
+	// Wrap with a mock middleware that simulates auth without DB
+	middleware := func(next http.Handler) http.Handler {
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			// Simulate auth header parsing
+			key := r.Header.Get(HeaderAPIKey)
+			if key == "" {
+				auth := r.Header.Get("Authorization")
+				if len(auth) > 7 && auth[:7] == "Bearer " {
+					key = auth[7:]
+				}
+			}
+
+			if key == "" {
+				w.WriteHeader(http.StatusUnauthorized)
+				return
+			}
+
+			// Simulate key validation (without DB)
+			if key != "valid-key" {
+				w.WriteHeader(http.StatusUnauthorized)
+				return
+			}
+
+			// Add key to context
+			ctx := context.WithValue(r.Context(), contextKeyAPIKey, apiKey)
+			next.ServeHTTP(w, r.WithContext(ctx))
+		})
+	}
+
+	wrappedHandler := middleware(handler)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		req := httptest.NewRequest("GET", "/projects", nil)
+		req.Header.Set(HeaderAPIKey, "valid-key")
+		rec := httptest.NewRecorder()
+		wrappedHandler.ServeHTTP(rec, req)
+	}
+}
+
+// BenchmarkAuthMiddleware_Bearer benchmarks auth with Bearer token.
+func BenchmarkAuthMiddleware_Bearer(b *testing.B) {
+	apiKey := &APIKey{
+		ID:        "test-key-id",
+		Name:      "benchmark-key",
+		KeyPrefix: "rdev",
+		Scopes:    []Scope{ScopeProjectsExecute, ScopeProjectsRead},
+		CreatedAt: time.Now(),
+	}
+
+	handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	})
+
+	middleware := func(next http.Handler) http.Handler {
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			key := r.Header.Get(HeaderAPIKey)
+			if key == "" {
+				auth := r.Header.Get("Authorization")
+				if len(auth) > 7 && auth[:7] == "Bearer " {
+					key = auth[7:]
+				}
+			}
+
+			if key == "" || key != "valid-key" {
+				w.WriteHeader(http.StatusUnauthorized)
+				return
+			}
+
+			ctx := context.WithValue(r.Context(), contextKeyAPIKey, apiKey)
+			next.ServeHTTP(w, r.WithContext(ctx))
+		})
+	}
+
+	wrappedHandler := middleware(handler)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		req := httptest.NewRequest("GET", "/projects", nil)
+		req.Header.Set("Authorization", "Bearer valid-key")
+		rec := httptest.NewRecorder()
+		wrappedHandler.ServeHTTP(rec, req)
+	}
+}
+
+// BenchmarkRequireScope benchmarks the scope checking middleware.
+func BenchmarkRequireScope(b *testing.B) {
+	apiKey := &APIKey{
+		ID:        "test-key-id",
+		Name:      "benchmark-key",
+		KeyPrefix: "rdev",
+		Scopes:    []Scope{ScopeProjectsExecute, ScopeProjectsRead},
+		CreatedAt: time.Now(),
+	}
+
+	handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	})
+
+	scopeMiddleware := RequireScope(ScopeProjectsExecute)
+	wrappedHandler := scopeMiddleware(handler)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		req := httptest.NewRequest("POST", "/projects/test/claude", nil)
+		// Pre-set the API key in context
+		ctx := context.WithValue(req.Context(), contextKeyAPIKey, apiKey)
+		req = req.WithContext(ctx)
+		rec := httptest.NewRecorder()
+		wrappedHandler.ServeHTTP(rec, req)
+	}
+}
+
+// BenchmarkGetClientIP benchmarks IP extraction from requests.
+func BenchmarkGetClientIP(b *testing.B) {
+	req := httptest.NewRequest("GET", "/", nil)
+	req.RemoteAddr = "192.168.1.100:12345"
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_ = getClientIP(req)
+	}
+}
+
+// BenchmarkGetClientIP_XForwardedFor benchmarks IP extraction with X-Forwarded-For.
+func BenchmarkGetClientIP_XForwardedFor(b *testing.B) {
+	req := httptest.NewRequest("GET", "/", nil)
+	req.Header.Set("X-Forwarded-For", "10.0.0.1, 192.168.1.1, 172.16.0.1")
+	req.RemoteAddr = "127.0.0.1:12345"
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_ = getClientIP(req)
+	}
+}
+
+// BenchmarkGetClientIP_IPv6 benchmarks IP extraction for IPv6.
+func BenchmarkGetClientIP_IPv6(b *testing.B) {
+	req := httptest.NewRequest("GET", "/", nil)
+	req.RemoteAddr = "[2001:db8::1]:12345"
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_ = getClientIP(req)
+	}
+}
+
+// BenchmarkIPAllowlistCheck benchmarks the IP allowlist checking.
+func BenchmarkIPAllowlistCheck(b *testing.B) {
+	apiKey := &APIKey{
+		ID:         "test-key-id",
+		Name:       "benchmark-key",
+		KeyPrefix:  "rdev",
+		Scopes:     []Scope{ScopeProjectsExecute},
+		AllowedIPs: []string{"192.168.1.0/24", "10.0.0.0/8", "172.16.0.0/12"},
+		CreatedAt:  time.Now(),
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_ = apiKey.IsIPAllowed("192.168.1.100")
+	}
+}
+
+// BenchmarkIPAllowlistCheck_NoAllowlist benchmarks IP check when no allowlist configured.
+func BenchmarkIPAllowlistCheck_NoAllowlist(b *testing.B) {
+	apiKey := &APIKey{
+		ID:        "test-key-id",
+		Name:      "benchmark-key",
+		KeyPrefix: "rdev",
+		Scopes:    []Scope{ScopeProjectsExecute},
+		CreatedAt: time.Now(),
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_ = apiKey.IsIPAllowed("192.168.1.100")
+	}
+}
+
+// BenchmarkHealthEndpointSkip benchmarks the path-skip logic for health endpoints.
+func BenchmarkHealthEndpointSkip(b *testing.B) {
+	handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	})
+
+	// Simulate the skip check in middleware
+	middleware := func(next http.Handler) http.Handler {
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			path := r.URL.Path
+			if path == "/health" || path == "/ready" || path == "/metrics" {
+				next.ServeHTTP(w, r)
+				return
+			}
+			// Would do auth here
+			next.ServeHTTP(w, r)
+		})
+	}
+
+	wrappedHandler := middleware(handler)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		req := httptest.NewRequest("GET", "/health", nil)
+		rec := httptest.NewRecorder()
+		wrappedHandler.ServeHTTP(rec, req)
+	}
+}
+
+// BenchmarkConcurrentAuth benchmarks concurrent auth middleware calls.
+func BenchmarkConcurrentAuth(b *testing.B) {
+	apiKey := &APIKey{
+		ID:        "test-key-id",
+		Name:      "benchmark-key",
+		KeyPrefix: "rdev",
+		Scopes:    []Scope{ScopeProjectsExecute, ScopeProjectsRead},
+		CreatedAt: time.Now(),
+	}
+
+	handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	})
+
+	middleware := func(next http.Handler) http.Handler {
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			key := r.Header.Get(HeaderAPIKey)
+			if key == "" || key != "valid-key" {
+				w.WriteHeader(http.StatusUnauthorized)
+				return
+			}
+			ctx := context.WithValue(r.Context(), contextKeyAPIKey, apiKey)
+			next.ServeHTTP(w, r.WithContext(ctx))
+		})
+	}
+
+	wrappedHandler := middleware(handler)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			req := httptest.NewRequest("GET", "/projects", nil)
+			req.Header.Set(HeaderAPIKey, "valid-key")
+			rec := httptest.NewRecorder()
+			wrappedHandler.ServeHTTP(rec, req)
+		}
+	})
+}
--- a/internal/auth/scopes.go
+++ b/internal/auth/scopes.go
@ -11,6 +11,11 @@ const (
 	ScopeProjectsExecute Scope = "projects:execute"
 	ScopeKeysRead        Scope = "keys:read"
 	ScopeKeysWrite       Scope = "keys:write"
+	ScopeAuditRead       Scope = "audit:read"
+	ScopeQueueRead       Scope = "queue:read"
+	ScopeQueueWrite      Scope = "queue:write"
+	ScopeWebhookRead     Scope = "webhook:read"
+	ScopeWebhookWrite    Scope = "webhook:write"
 	ScopeAdmin           Scope = "admin"
 )

@ -20,6 +25,11 @@ var AllScopes = []Scope{
 	ScopeProjectsExecute,
 	ScopeKeysRead,
 	ScopeKeysWrite,
+	ScopeAuditRead,
+	ScopeQueueRead,
+	ScopeQueueWrite,
+	ScopeWebhookRead,
+	ScopeWebhookWrite,
 	ScopeAdmin,
 }

@ -29,6 +39,11 @@ var ScopeDescriptions = map[Scope]string{
 	ScopeProjectsExecute: "Execute commands (claude, shell, git) on projects",
 	ScopeKeysRead:        "List API keys (metadata only, not secrets)",
 	ScopeKeysWrite:       "Create and revoke API keys",
+	ScopeAuditRead:       "View audit logs for command executions",
+	ScopeQueueRead:       "View queued commands and queue status",
+	ScopeQueueWrite:      "Enqueue and cancel queued commands",
+	ScopeWebhookRead:     "View webhooks and delivery history",
+	ScopeWebhookWrite:    "Create, update, and delete webhooks",
 	ScopeAdmin:           "Full administrative access (includes all scopes)",
 }

--- a/internal/auth/service.go
+++ b/internal/auth/service.go
@ -5,6 +5,7 @@ import (
 	"database/sql"
 	"errors"
 	"fmt"
+	"net"
 	"time"

 	"github.com/lib/pq"
@ -15,6 +16,7 @@ var (
 	ErrKeyNotFound  = errors.New("api key not found")
 	ErrKeyRevoked   = errors.New("api key has been revoked")
 	ErrKeyExpired   = errors.New("api key has expired")
+	ErrIPNotAllowed = errors.New("ip address not allowed")
 )

 // APIKey represents a stored API key.
@ -24,6 +26,7 @@ type APIKey struct {
 	KeyPrefix  string
 	Scopes     []Scope
 	ProjectIDs []string // nil = all projects
+	AllowedIPs []string // CIDR notation, e.g., ["192.168.1.0/24"]; nil = no restriction
 	CreatedAt  time.Time
 	ExpiresAt  *time.Time
 	LastUsedAt *time.Time
@ -49,11 +52,42 @@ func (k *APIKey) IsActive() bool {
 	return !k.IsRevoked() && !k.IsExpired()
 }

+// IsIPAllowed checks if the given IP address is allowed by the key's IP restrictions.
+// Returns true if no IP restrictions are set or if the IP matches any allowed CIDR.
+func (k *APIKey) IsIPAllowed(clientIP string) bool {
+	// No restrictions means all IPs are allowed
+	if len(k.AllowedIPs) == 0 {
+		return true
+	}
+
+	ip := net.ParseIP(clientIP)
+	if ip == nil {
+		return false
+	}
+
+	for _, cidr := range k.AllowedIPs {
+		_, network, err := net.ParseCIDR(cidr)
+		if err != nil {
+			// If not a CIDR, try parsing as single IP
+			allowedIP := net.ParseIP(cidr)
+			if allowedIP != nil && allowedIP.Equal(ip) {
+				return true
+			}
+			continue
+		}
+		if network.Contains(ip) {
+			return true
+		}
+	}
+	return false
+}
+
 // CreateKeyRequest is the input for creating a new key.
 type CreateKeyRequest struct {
 	Name       string
 	Scopes     []Scope
 	ProjectIDs []string      // nil = all projects
+	AllowedIPs []string      // CIDR notation; nil = no restriction
 	ExpiresIn  time.Duration // 0 = never
 	CreatedBy  string
 }
@ -104,10 +138,10 @@ func (s *Service) Create(ctx context.Context, req CreateKeyRequest) (*CreateKeyR

 	var id string
 	err = s.db.QueryRowContext(ctx, `
-		INSERT INTO api_keys (name, key_hash, key_prefix, scopes, project_ids, expires_at, created_by)
-		VALUES ($1, $2, $3, $4, $5, $6, $7)
+		INSERT INTO api_keys (name, key_hash, key_prefix, scopes, project_ids, allowed_ips, expires_at, created_by)
+		VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
 		RETURNING id
-	`, req.Name, keyHash, prefix, pq.Array(scopeStrings), pq.Array(req.ProjectIDs), expiresAt, req.CreatedBy).Scan(&id)
+	`, req.Name, keyHash, prefix, pq.Array(scopeStrings), pq.Array(req.ProjectIDs), pq.Array(req.AllowedIPs), expiresAt, req.CreatedBy).Scan(&id)

 	if err != nil {
 		return nil, fmt.Errorf("insert key: %w", err)
@ -119,6 +153,7 @@ func (s *Service) Create(ctx context.Context, req CreateKeyRequest) (*CreateKeyR
 		KeyPrefix:  prefix,
 		Scopes:     req.Scopes,
 		ProjectIDs: req.ProjectIDs,
+		AllowedIPs: req.AllowedIPs,
 		CreatedAt:  time.Now(),
 		ExpiresAt:  expiresAt,
 		CreatedBy:  req.CreatedBy,
@ -156,7 +191,7 @@ func (s *Service) Validate(ctx context.Context, key string) (*APIKey, error) {
 	)

 	err := s.db.QueryRowContext(ctx, `
-		SELECT id, name, key_prefix, scopes, project_ids, created_at, expires_at, last_used_at, revoked_at, created_by
+		SELECT id, name, key_prefix, scopes, project_ids, allowed_ips, created_at, expires_at, last_used_at, revoked_at, created_by
 		FROM api_keys
 		WHERE key_hash = $1
 	`, keyHash).Scan(
@ -165,6 +200,7 @@ func (s *Service) Validate(ctx context.Context, key string) (*APIKey, error) {
 		&apiKey.KeyPrefix,
 		pq.Array(&scopeStrings),
 		pq.Array(&apiKey.ProjectIDs),
+		pq.Array(&apiKey.AllowedIPs),
 		&apiKey.CreatedAt,
 		&apiKey.ExpiresAt,
 		&apiKey.LastUsedAt,
@ -191,7 +227,7 @@ func (s *Service) Validate(ctx context.Context, key string) (*APIKey, error) {

 	// Update last_used_at asynchronously
 	go func() {
-		s.db.ExecContext(context.Background(), `
+		_, _ = s.db.ExecContext(context.Background(), `
 			UPDATE api_keys SET last_used_at = NOW() WHERE id = $1
 		`, apiKey.ID)
 	}()
@ -202,14 +238,14 @@ func (s *Service) Validate(ctx context.Context, key string) (*APIKey, error) {
 // List returns all API keys (without secrets).
 func (s *Service) List(ctx context.Context) ([]*APIKey, error) {
 	rows, err := s.db.QueryContext(ctx, `
-		SELECT id, name, key_prefix, scopes, project_ids, created_at, expires_at, last_used_at, revoked_at, created_by
+		SELECT id, name, key_prefix, scopes, project_ids, allowed_ips, created_at, expires_at, last_used_at, revoked_at, created_by
 		FROM api_keys
 		ORDER BY created_at DESC
 	`)
 	if err != nil {
 		return nil, fmt.Errorf("query keys: %w", err)
 	}
-	defer rows.Close()
+	defer func() { _ = rows.Close() }()

 	var keys []*APIKey
 	for rows.Next() {
@ -223,6 +259,7 @@ func (s *Service) List(ctx context.Context) ([]*APIKey, error) {
 			&key.KeyPrefix,
 			pq.Array(&scopeStrings),
 			pq.Array(&key.ProjectIDs),
+			pq.Array(&key.AllowedIPs),
 			&key.CreatedAt,
 			&key.ExpiresAt,
 			&key.LastUsedAt,
@ -246,7 +283,7 @@ func (s *Service) Get(ctx context.Context, id string) (*APIKey, error) {
 	)

 	err := s.db.QueryRowContext(ctx, `
-		SELECT id, name, key_prefix, scopes, project_ids, created_at, expires_at, last_used_at, revoked_at, created_by
+		SELECT id, name, key_prefix, scopes, project_ids, allowed_ips, created_at, expires_at, last_used_at, revoked_at, created_by
 		FROM api_keys
 		WHERE id = $1
 	`, id).Scan(
@ -255,6 +292,7 @@ func (s *Service) Get(ctx context.Context, id string) (*APIKey, error) {
 		&key.KeyPrefix,
 		pq.Array(&scopeStrings),
 		pq.Array(&key.ProjectIDs),
+		pq.Array(&key.AllowedIPs),
 		&key.CreatedAt,
 		&key.ExpiresAt,
 		&key.LastUsedAt,
--- a/internal/auth/service_test.go
+++ b/internal/auth/service_test.go
@ -392,3 +392,194 @@ func TestService_Revoke(t *testing.T) {
 		}
 	})
 }
+
+func TestAPIKey_IsIPAllowed(t *testing.T) {
+	tests := []struct {
+		name       string
+		allowedIPs []string
+		clientIP   string
+		want       bool
+	}{
+		{
+			name:       "no restrictions - any IP allowed",
+			allowedIPs: nil,
+			clientIP:   "192.168.1.100",
+			want:       true,
+		},
+		{
+			name:       "empty restrictions - any IP allowed",
+			allowedIPs: []string{},
+			clientIP:   "10.0.0.5",
+			want:       true,
+		},
+		{
+			name:       "single IP match",
+			allowedIPs: []string{"192.168.1.100"},
+			clientIP:   "192.168.1.100",
+			want:       true,
+		},
+		{
+			name:       "single IP no match",
+			allowedIPs: []string{"192.168.1.100"},
+			clientIP:   "192.168.1.101",
+			want:       false,
+		},
+		{
+			name:       "CIDR match",
+			allowedIPs: []string{"192.168.1.0/24"},
+			clientIP:   "192.168.1.55",
+			want:       true,
+		},
+		{
+			name:       "CIDR no match",
+			allowedIPs: []string{"192.168.1.0/24"},
+			clientIP:   "192.168.2.1",
+			want:       false,
+		},
+		{
+			name:       "multiple CIDRs - first matches",
+			allowedIPs: []string{"10.0.0.0/8", "192.168.0.0/16"},
+			clientIP:   "10.50.25.100",
+			want:       true,
+		},
+		{
+			name:       "multiple CIDRs - second matches",
+			allowedIPs: []string{"10.0.0.0/8", "192.168.0.0/16"},
+			clientIP:   "192.168.50.1",
+			want:       true,
+		},
+		{
+			name:       "multiple CIDRs - none match",
+			allowedIPs: []string{"10.0.0.0/8", "192.168.0.0/16"},
+			clientIP:   "172.16.0.1",
+			want:       false,
+		},
+		{
+			name:       "mixed IP and CIDR - IP matches",
+			allowedIPs: []string{"10.0.0.0/8", "172.16.0.1"},
+			clientIP:   "172.16.0.1",
+			want:       true,
+		},
+		{
+			name:       "mixed IP and CIDR - CIDR matches",
+			allowedIPs: []string{"10.0.0.0/8", "172.16.0.1"},
+			clientIP:   "10.1.2.3",
+			want:       true,
+		},
+		{
+			name:       "IPv6 CIDR",
+			allowedIPs: []string{"2001:db8::/32"},
+			clientIP:   "2001:db8:1234:5678::1",
+			want:       true,
+		},
+		{
+			name:       "IPv6 no match",
+			allowedIPs: []string{"2001:db8::/32"},
+			clientIP:   "2001:db9::1",
+			want:       false,
+		},
+		{
+			name:       "invalid client IP",
+			allowedIPs: []string{"192.168.1.0/24"},
+			clientIP:   "not-an-ip",
+			want:       false,
+		},
+		{
+			name:       "invalid CIDR in allowlist (fallback to IP parse)",
+			allowedIPs: []string{"invalid/cidr", "192.168.1.100"},
+			clientIP:   "192.168.1.100",
+			want:       true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			key := &APIKey{AllowedIPs: tt.allowedIPs}
+			if got := key.IsIPAllowed(tt.clientIP); got != tt.want {
+				t.Errorf("IsIPAllowed(%q) = %v, want %v", tt.clientIP, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestService_CreateWithAllowedIPs(t *testing.T) {
+	db := testutil.TestDB(t)
+	t.Cleanup(func() { testutil.CleanupTestKeys(t, db) })
+
+	svc := NewService(db, "admin-key")
+
+	t.Run("creates key with IP restrictions", func(t *testing.T) {
+		resp, err := svc.Create(context.Background(), CreateKeyRequest{
+			Name:       "test-ip-key",
+			Scopes:     []Scope{ScopeProjectsRead},
+			AllowedIPs: []string{"192.168.1.0/24", "10.0.0.1"},
+			ExpiresIn:  24 * time.Hour,
+			CreatedBy:  "test",
+		})
+		if err != nil {
+			t.Fatalf("Create() error = %v", err)
+		}
+
+		if len(resp.Key.AllowedIPs) != 2 {
+			t.Errorf("Key.AllowedIPs length = %d, want 2", len(resp.Key.AllowedIPs))
+		}
+
+		// Verify via Get
+		key, err := svc.Get(context.Background(), resp.Key.ID)
+		if err != nil {
+			t.Fatalf("Get() error = %v", err)
+		}
+
+		if len(key.AllowedIPs) != 2 {
+			t.Errorf("Retrieved Key.AllowedIPs length = %d, want 2", len(key.AllowedIPs))
+		}
+
+		// Verify via Validate
+		validatedKey, err := svc.Validate(context.Background(), resp.Secret)
+		if err != nil {
+			t.Fatalf("Validate() error = %v", err)
+		}
+
+		if len(validatedKey.AllowedIPs) != 2 {
+			t.Errorf("Validated Key.AllowedIPs length = %d, want 2", len(validatedKey.AllowedIPs))
+		}
+
+		// Verify IP checking works
+		if !validatedKey.IsIPAllowed("192.168.1.50") {
+			t.Error("IsIPAllowed should return true for IP in allowed CIDR")
+		}
+		if !validatedKey.IsIPAllowed("10.0.0.1") {
+			t.Error("IsIPAllowed should return true for explicitly allowed IP")
+		}
+		if validatedKey.IsIPAllowed("172.16.0.1") {
+			t.Error("IsIPAllowed should return false for IP not in allowed list")
+		}
+	})
+
+	t.Run("creates key with no IP restrictions", func(t *testing.T) {
+		resp, err := svc.Create(context.Background(), CreateKeyRequest{
+			Name:      "test-no-ip-key",
+			Scopes:    []Scope{ScopeProjectsRead},
+			ExpiresIn: 24 * time.Hour,
+			CreatedBy: "test",
+		})
+		if err != nil {
+			t.Fatalf("Create() error = %v", err)
+		}
+
+		if len(resp.Key.AllowedIPs) != 0 {
+			t.Errorf("Key.AllowedIPs should be empty, got %v", resp.Key.AllowedIPs)
+		}
+
+		// Verify via Validate
+		validatedKey, err := svc.Validate(context.Background(), resp.Secret)
+		if err != nil {
+			t.Fatalf("Validate() error = %v", err)
+		}
+
+		// Any IP should be allowed
+		if !validatedKey.IsIPAllowed("1.2.3.4") {
+			t.Error("IsIPAllowed should return true when no restrictions set")
+		}
+	})
+}
--- a/internal/circuitbreaker/circuitbreaker.go
+++ b/internal/circuitbreaker/circuitbreaker.go
@ -0,0 +1,220 @@
+// Package circuitbreaker provides protection against cascading failures.
+//
+// The circuit breaker pattern prevents repeated calls to a failing service,
+// allowing it time to recover. After a threshold of failures, the circuit
+// "opens" and returns errors immediately without attempting the operation.
+package circuitbreaker
+
+import (
+	"errors"
+	"sync"
+	"time"
+)
+
+// State represents the circuit breaker state.
+type State int
+
+const (
+	// Closed is the normal operating state - requests are allowed through.
+	Closed State = iota
+	// Open means the circuit is tripped - requests fail immediately.
+	Open
+	// HalfOpen means we're testing if the service has recovered.
+	HalfOpen
+)
+
+func (s State) String() string {
+	switch s {
+	case Closed:
+		return "closed"
+	case Open:
+		return "open"
+	case HalfOpen:
+		return "half-open"
+	default:
+		return "unknown"
+	}
+}
+
+// Errors returned by the circuit breaker.
+var (
+	ErrCircuitOpen = errors.New("circuit breaker is open")
+)
+
+// Config configures the circuit breaker behavior.
+type Config struct {
+	// FailureThreshold is the number of consecutive failures before opening.
+	// Default: 5
+	FailureThreshold int
+
+	// ResetTimeout is how long to wait before attempting recovery (half-open).
+	// Default: 30 seconds
+	ResetTimeout time.Duration
+
+	// HalfOpenRequests is how many requests to allow in half-open state.
+	// Default: 1
+	HalfOpenRequests int
+}
+
+// DefaultConfig returns sensible defaults.
+func DefaultConfig() Config {
+	return Config{
+		FailureThreshold: 5,
+		ResetTimeout:     30 * time.Second,
+		HalfOpenRequests: 1,
+	}
+}
+
+// CircuitBreaker implements the circuit breaker pattern.
+type CircuitBreaker struct {
+	cfg Config
+
+	mu               sync.RWMutex
+	state            State
+	failures         int
+	successes        int
+	lastFailure      time.Time
+	halfOpenRequests int
+}
+
+// New creates a new circuit breaker with the given configuration.
+func New(cfg Config) *CircuitBreaker {
+	if cfg.FailureThreshold <= 0 {
+		cfg.FailureThreshold = 5
+	}
+	if cfg.ResetTimeout <= 0 {
+		cfg.ResetTimeout = 30 * time.Second
+	}
+	if cfg.HalfOpenRequests <= 0 {
+		cfg.HalfOpenRequests = 1
+	}
+
+	return &CircuitBreaker{
+		cfg:   cfg,
+		state: Closed,
+	}
+}
+
+// Execute runs the function if the circuit allows it.
+// Returns ErrCircuitOpen if the circuit is open.
+func (cb *CircuitBreaker) Execute(fn func() error) error {
+	if !cb.canExecute() {
+		return ErrCircuitOpen
+	}
+
+	err := fn()
+	cb.recordResult(err)
+	return err
+}
+
+// canExecute checks if a request should be allowed.
+func (cb *CircuitBreaker) canExecute() bool {
+	cb.mu.Lock()
+	defer cb.mu.Unlock()
+
+	switch cb.state {
+	case Closed:
+		return true
+
+	case Open:
+		// Check if reset timeout has passed
+		if time.Since(cb.lastFailure) > cb.cfg.ResetTimeout {
+			cb.state = HalfOpen
+			cb.halfOpenRequests = 0
+			return true
+		}
+		return false
+
+	case HalfOpen:
+		// Allow limited requests in half-open state
+		if cb.halfOpenRequests < cb.cfg.HalfOpenRequests {
+			cb.halfOpenRequests++
+			return true
+		}
+		return false
+	}
+
+	return false
+}
+
+// recordResult updates state based on operation outcome.
+func (cb *CircuitBreaker) recordResult(err error) {
+	cb.mu.Lock()
+	defer cb.mu.Unlock()
+
+	if err != nil {
+		cb.onFailure()
+	} else {
+		cb.onSuccess()
+	}
+}
+
+// onFailure handles a failed operation.
+func (cb *CircuitBreaker) onFailure() {
+	cb.failures++
+	cb.successes = 0
+	cb.lastFailure = time.Now()
+
+	switch cb.state {
+	case Closed:
+		if cb.failures >= cb.cfg.FailureThreshold {
+			cb.state = Open
+		}
+	case HalfOpen:
+		cb.state = Open
+	}
+}
+
+// onSuccess handles a successful operation.
+func (cb *CircuitBreaker) onSuccess() {
+	cb.successes++
+
+	switch cb.state {
+	case Closed:
+		cb.failures = 0
+	case HalfOpen:
+		// Successful probe - close the circuit
+		cb.state = Closed
+		cb.failures = 0
+	}
+}
+
+// State returns the current circuit state.
+func (cb *CircuitBreaker) State() State {
+	cb.mu.RLock()
+	defer cb.mu.RUnlock()
+	return cb.state
+}
+
+// Stats returns current circuit statistics.
+func (cb *CircuitBreaker) Stats() Stats {
+	cb.mu.RLock()
+	defer cb.mu.RUnlock()
+
+	return Stats{
+		State:       cb.state,
+		Failures:    cb.failures,
+		Successes:   cb.successes,
+		LastFailure: cb.lastFailure,
+	}
+}
+
+// Reset manually resets the circuit breaker to closed state.
+func (cb *CircuitBreaker) Reset() {
+	cb.mu.Lock()
+	defer cb.mu.Unlock()
+
+	cb.state = Closed
+	cb.failures = 0
+	cb.successes = 0
+	cb.lastFailure = time.Time{}
+	cb.halfOpenRequests = 0
+}
+
+// Stats contains circuit breaker statistics.
+type Stats struct {
+	State       State
+	Failures    int
+	Successes   int
+	LastFailure time.Time
+}
--- a/internal/circuitbreaker/circuitbreaker_test.go
+++ b/internal/circuitbreaker/circuitbreaker_test.go
@ -0,0 +1,284 @@
+package circuitbreaker
+
+import (
+	"errors"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+var errTest = errors.New("test error")
+
+func TestCircuitBreaker_Closed(t *testing.T) {
+	cb := New(DefaultConfig())
+
+	// Should be closed initially
+	if cb.State() != Closed {
+		t.Errorf("initial state = %v, want Closed", cb.State())
+	}
+
+	// Successful calls should work
+	called := false
+	err := cb.Execute(func() error {
+		called = true
+		return nil
+	})
+
+	if err != nil {
+		t.Errorf("Execute() error = %v", err)
+	}
+	if !called {
+		t.Error("function was not called")
+	}
+}
+
+func TestCircuitBreaker_OpensAfterFailures(t *testing.T) {
+	cb := New(Config{
+		FailureThreshold: 3,
+		ResetTimeout:     1 * time.Second,
+	})
+
+	// Fail 3 times
+	for i := 0; i < 3; i++ {
+		_ = cb.Execute(func() error {
+			return errTest
+		})
+	}
+
+	// Should be open now
+	if cb.State() != Open {
+		t.Errorf("state after 3 failures = %v, want Open", cb.State())
+	}
+
+	// Next call should fail immediately
+	called := false
+	err := cb.Execute(func() error {
+		called = true
+		return nil
+	})
+
+	if err != ErrCircuitOpen {
+		t.Errorf("Execute() error = %v, want ErrCircuitOpen", err)
+	}
+	if called {
+		t.Error("function should not be called when circuit is open")
+	}
+}
+
+func TestCircuitBreaker_HalfOpenAfterTimeout(t *testing.T) {
+	cb := New(Config{
+		FailureThreshold: 2,
+		ResetTimeout:     50 * time.Millisecond,
+	})
+
+	// Trip the circuit
+	_ = cb.Execute(func() error { return errTest })
+	_ = cb.Execute(func() error { return errTest })
+
+	if cb.State() != Open {
+		t.Fatalf("expected Open state, got %v", cb.State())
+	}
+
+	// Wait for reset timeout
+	time.Sleep(60 * time.Millisecond)
+
+	// Next request should be allowed (half-open)
+	called := false
+	err := cb.Execute(func() error {
+		called = true
+		return nil
+	})
+
+	if err != nil {
+		t.Errorf("Execute() in half-open = %v", err)
+	}
+	if !called {
+		t.Error("function should be called in half-open state")
+	}
+
+	// After success, circuit should be closed
+	if cb.State() != Closed {
+		t.Errorf("state after successful probe = %v, want Closed", cb.State())
+	}
+}
+
+func TestCircuitBreaker_HalfOpenRetripsOnFailure(t *testing.T) {
+	cb := New(Config{
+		FailureThreshold: 2,
+		ResetTimeout:     50 * time.Millisecond,
+	})
+
+	// Trip the circuit
+	_ = cb.Execute(func() error { return errTest })
+	_ = cb.Execute(func() error { return errTest })
+
+	// Wait for reset timeout
+	time.Sleep(60 * time.Millisecond)
+
+	// Fail in half-open state
+	_ = cb.Execute(func() error { return errTest })
+
+	// Should be open again
+	if cb.State() != Open {
+		t.Errorf("state after half-open failure = %v, want Open", cb.State())
+	}
+}
+
+func TestCircuitBreaker_SuccessResetsFailures(t *testing.T) {
+	cb := New(Config{
+		FailureThreshold: 3,
+		ResetTimeout:     1 * time.Second,
+	})
+
+	// 2 failures
+	cb.Execute(func() error { return errTest })
+	cb.Execute(func() error { return errTest })
+
+	// 1 success should reset the count
+	cb.Execute(func() error { return nil })
+
+	// 2 more failures - should not open (only 2 consecutive)
+	cb.Execute(func() error { return errTest })
+	cb.Execute(func() error { return errTest })
+
+	if cb.State() != Closed {
+		t.Errorf("state = %v, want Closed (success reset counter)", cb.State())
+	}
+}
+
+func TestCircuitBreaker_Stats(t *testing.T) {
+	cb := New(Config{
+		FailureThreshold: 5,
+		ResetTimeout:     1 * time.Second,
+	})
+
+	// Some operations
+	cb.Execute(func() error { return nil })
+	cb.Execute(func() error { return errTest })
+	cb.Execute(func() error { return errTest })
+
+	stats := cb.Stats()
+
+	if stats.State != Closed {
+		t.Errorf("Stats.State = %v, want Closed", stats.State)
+	}
+	if stats.Failures != 2 {
+		t.Errorf("Stats.Failures = %d, want 2", stats.Failures)
+	}
+	if stats.LastFailure.IsZero() {
+		t.Error("Stats.LastFailure should not be zero")
+	}
+}
+
+func TestCircuitBreaker_Reset(t *testing.T) {
+	cb := New(Config{
+		FailureThreshold: 2,
+		ResetTimeout:     1 * time.Hour,
+	})
+
+	// Trip the circuit
+	cb.Execute(func() error { return errTest })
+	cb.Execute(func() error { return errTest })
+
+	if cb.State() != Open {
+		t.Fatalf("expected Open state, got %v", cb.State())
+	}
+
+	// Manual reset
+	cb.Reset()
+
+	if cb.State() != Closed {
+		t.Errorf("state after Reset() = %v, want Closed", cb.State())
+	}
+
+	// Should work again
+	called := false
+	cb.Execute(func() error {
+		called = true
+		return nil
+	})
+	if !called {
+		t.Error("function should be called after Reset()")
+	}
+}
+
+func TestCircuitBreaker_Concurrent(t *testing.T) {
+	cb := New(Config{
+		FailureThreshold: 10,
+		ResetTimeout:     100 * time.Millisecond,
+	})
+
+	var wg sync.WaitGroup
+	var successCount, failCount atomic.Int64
+
+	// Concurrent executions
+	for i := 0; i < 100; i++ {
+		wg.Add(1)
+		go func(id int) {
+			defer wg.Done()
+			var err error
+			if id%3 == 0 {
+				err = errTest
+			}
+			result := cb.Execute(func() error { return err })
+			if result == nil {
+				successCount.Add(1)
+			} else {
+				failCount.Add(1)
+			}
+		}(i)
+	}
+
+	wg.Wait()
+
+	total := successCount.Load() + failCount.Load()
+	if total != 100 {
+		t.Errorf("total executions = %d, want 100", total)
+	}
+}
+
+func TestState_String(t *testing.T) {
+	tests := []struct {
+		state State
+		want  string
+	}{
+		{Closed, "closed"},
+		{Open, "open"},
+		{HalfOpen, "half-open"},
+		{State(99), "unknown"},
+	}
+
+	for _, tt := range tests {
+		if got := tt.state.String(); got != tt.want {
+			t.Errorf("State(%d).String() = %q, want %q", tt.state, got, tt.want)
+		}
+	}
+}
+
+func TestDefaultConfig(t *testing.T) {
+	cfg := DefaultConfig()
+
+	if cfg.FailureThreshold != 5 {
+		t.Errorf("FailureThreshold = %d, want 5", cfg.FailureThreshold)
+	}
+	if cfg.ResetTimeout != 30*time.Second {
+		t.Errorf("ResetTimeout = %v, want 30s", cfg.ResetTimeout)
+	}
+	if cfg.HalfOpenRequests != 1 {
+		t.Errorf("HalfOpenRequests = %d, want 1", cfg.HalfOpenRequests)
+	}
+}
+
+func TestNew_DefaultsInvalidValues(t *testing.T) {
+	cb := New(Config{
+		FailureThreshold: -1,
+		ResetTimeout:     -1,
+		HalfOpenRequests: -1,
+	})
+
+	stats := cb.Stats()
+	if stats.State != Closed {
+		t.Error("new circuit breaker should be Closed")
+	}
+}
--- a/internal/db/migrations/003_add_allowed_ips.sql
+++ b/internal/db/migrations/003_add_allowed_ips.sql
@ -0,0 +1,9 @@
+-- Add IP allowlisting support to API keys
+-- Allows restricting API key usage to specific IP addresses/CIDR ranges
+
+-- Add allowed_ips column to api_keys table
+-- Using TEXT[] to store CIDR notation strings (e.g., "192.168.1.0/24", "10.0.0.0/8")
+-- NULL means no IP restriction (allow from anywhere)
+ALTER TABLE api_keys ADD COLUMN IF NOT EXISTS allowed_ips TEXT[];
+
+COMMENT ON COLUMN api_keys.allowed_ips IS 'Array of allowed IP addresses/CIDR ranges. NULL = no restriction (allow from anywhere)';
--- a/internal/db/migrations/004_audit_log.sql
+++ b/internal/db/migrations/004_audit_log.sql
@ -0,0 +1,40 @@
+-- Audit log table for tracking command execution history
+CREATE TABLE IF NOT EXISTS audit_log (
+    id               TEXT PRIMARY KEY,
+    api_key_id       TEXT NOT NULL,
+    command_id       TEXT NOT NULL,
+    project_id       TEXT NOT NULL,
+    command_type     TEXT NOT NULL,
+    args             TEXT,
+    client_ip        TEXT,
+    user_agent       TEXT,
+    started_at       TIMESTAMPTZ NOT NULL,
+    completed_at     TIMESTAMPTZ,
+    exit_code        INTEGER,
+    duration_ms      INTEGER,
+    status           TEXT DEFAULT 'running',
+    error_message    TEXT,
+    output_size_bytes INTEGER DEFAULT 0,
+    created_at       TIMESTAMPTZ DEFAULT NOW()
+);
+
+-- Index for querying by API key (e.g., "show me all commands from this key")
+CREATE INDEX IF NOT EXISTS idx_audit_api_key ON audit_log(api_key_id, created_at DESC);
+
+-- Index for querying by project (e.g., "show me all commands for this project")
+CREATE INDEX IF NOT EXISTS idx_audit_project ON audit_log(project_id, created_at DESC);
+
+-- Index for looking up by command ID (for updating completion status)
+CREATE INDEX IF NOT EXISTS idx_audit_command ON audit_log(command_id);
+
+-- Index for filtering by status
+CREATE INDEX IF NOT EXISTS idx_audit_status ON audit_log(status, created_at DESC);
+
+COMMENT ON TABLE audit_log IS 'Persistent audit log for all command executions';
+COMMENT ON COLUMN audit_log.api_key_id IS 'ID of the API key that initiated the command';
+COMMENT ON COLUMN audit_log.command_id IS 'Unique identifier for the command execution';
+COMMENT ON COLUMN audit_log.project_id IS 'Project/pod where command was executed';
+COMMENT ON COLUMN audit_log.command_type IS 'Type: claude, shell, or git';
+COMMENT ON COLUMN audit_log.args IS 'JSON-encoded command arguments';
+COMMENT ON COLUMN audit_log.status IS 'running, success, error, or cancelled';
+COMMENT ON COLUMN audit_log.output_size_bytes IS 'Total size of command output in bytes';
--- a/internal/db/migrations/005_rate_limiting.sql
+++ b/internal/db/migrations/005_rate_limiting.sql
@ -0,0 +1,31 @@
+-- Add rate limiting columns to api_keys table
+ALTER TABLE api_keys
+    ADD COLUMN IF NOT EXISTS rate_limit_per_minute INT DEFAULT 60,
+    ADD COLUMN IF NOT EXISTS rate_limit_per_hour INT DEFAULT 1000;
+
+-- Create rate_limit_state table to track per-key usage windows
+CREATE TABLE IF NOT EXISTS rate_limit_state (
+    id               SERIAL PRIMARY KEY,
+    api_key_id       UUID NOT NULL REFERENCES api_keys(id) ON DELETE CASCADE,
+    window_start     TIMESTAMPTZ NOT NULL,
+    window_type      VARCHAR(10) NOT NULL, -- 'minute' or 'hour'
+    request_count    INT NOT NULL DEFAULT 0,
+    created_at       TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at       TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    UNIQUE (api_key_id, window_start, window_type)
+);
+
+-- Index for efficient lookups by api_key_id and window
+CREATE INDEX IF NOT EXISTS idx_rate_limit_state_lookup
+    ON rate_limit_state(api_key_id, window_type, window_start DESC);
+
+-- Index for cleanup of old entries
+CREATE INDEX IF NOT EXISTS idx_rate_limit_state_cleanup
+    ON rate_limit_state(window_start);
+
+COMMENT ON TABLE rate_limit_state IS 'Tracks rate limit usage per API key per time window';
+COMMENT ON COLUMN rate_limit_state.window_start IS 'Start of the time window (truncated to minute or hour)';
+COMMENT ON COLUMN rate_limit_state.window_type IS 'Type of window: minute or hour';
+COMMENT ON COLUMN rate_limit_state.request_count IS 'Number of requests in this window';
+COMMENT ON COLUMN api_keys.rate_limit_per_minute IS 'Maximum requests allowed per minute (default: 60)';
+COMMENT ON COLUMN api_keys.rate_limit_per_hour IS 'Maximum requests allowed per hour (default: 1000)';
--- a/internal/db/migrations/006_command_queue.sql
+++ b/internal/db/migrations/006_command_queue.sql
@ -0,0 +1,47 @@
+-- Create command_queue table for async command execution
+CREATE TABLE IF NOT EXISTS command_queue (
+    id               UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    project_id       TEXT NOT NULL,
+    command          TEXT NOT NULL,
+    command_type     VARCHAR(20) NOT NULL, -- 'claude', 'shell', 'git'
+    working_dir      TEXT,
+    status           VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending, running, completed, failed, cancelled
+    priority         INT NOT NULL DEFAULT 0,
+    created_at       TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    started_at       TIMESTAMPTZ,
+    completed_at     TIMESTAMPTZ,
+    result_exit_code INT,
+    result_output    TEXT,
+    result_error     TEXT,
+    api_key_id       TEXT -- For audit trail, references the key that enqueued the command
+);
+
+-- Index for efficient queue queries: fetch pending commands by project ordered by priority
+CREATE INDEX IF NOT EXISTS idx_command_queue_project_status
+    ON command_queue(project_id, status, priority DESC, created_at ASC);
+
+-- Index for looking up commands by status (for monitoring/admin)
+CREATE INDEX IF NOT EXISTS idx_command_queue_status
+    ON command_queue(status);
+
+-- Index for cleanup of old completed commands
+CREATE INDEX IF NOT EXISTS idx_command_queue_completed_at
+    ON command_queue(completed_at)
+    WHERE completed_at IS NOT NULL;
+
+-- Index for audit trail by API key
+CREATE INDEX IF NOT EXISTS idx_command_queue_api_key
+    ON command_queue(api_key_id)
+    WHERE api_key_id IS NOT NULL;
+
+COMMENT ON TABLE command_queue IS 'Queued commands for async execution per project';
+COMMENT ON COLUMN command_queue.project_id IS 'Target project ID for command execution';
+COMMENT ON COLUMN command_queue.command IS 'The command to execute (prompt for claude, command for shell, JSON args for git)';
+COMMENT ON COLUMN command_queue.command_type IS 'Type of command: claude, shell, or git';
+COMMENT ON COLUMN command_queue.working_dir IS 'Optional working directory for command execution';
+COMMENT ON COLUMN command_queue.status IS 'Command status: pending, running, completed, failed, cancelled';
+COMMENT ON COLUMN command_queue.priority IS 'Priority level (higher = more urgent, 0 = default)';
+COMMENT ON COLUMN command_queue.result_exit_code IS 'Exit code from command execution';
+COMMENT ON COLUMN command_queue.result_output IS 'Stdout from command execution';
+COMMENT ON COLUMN command_queue.result_error IS 'Stderr or error message from command execution';
+COMMENT ON COLUMN command_queue.api_key_id IS 'API key ID that enqueued this command (for audit)';
--- a/internal/db/migrations/007_webhooks.sql
+++ b/internal/db/migrations/007_webhooks.sql
@ -0,0 +1,69 @@
+-- Create webhooks table for project webhook subscriptions
+CREATE TABLE IF NOT EXISTS webhooks (
+    id               TEXT PRIMARY KEY,
+    project_id       TEXT NOT NULL,
+    url              TEXT NOT NULL,
+    secret           TEXT, -- HMAC-SHA256 signing secret (optional but recommended)
+    events           TEXT NOT NULL, -- JSON array of event types to subscribe
+    enabled          BOOLEAN NOT NULL DEFAULT true,
+    created_at       TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at       TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+-- Index for efficient lookup by project
+CREATE INDEX IF NOT EXISTS idx_webhooks_project_id
+    ON webhooks(project_id);
+
+-- Index for finding enabled webhooks (most queries will filter by enabled)
+CREATE INDEX IF NOT EXISTS idx_webhooks_enabled
+    ON webhooks(enabled)
+    WHERE enabled = true;
+
+-- GIN index for efficient JSONB containment queries on events column
+-- Required for: WHERE events::jsonb ? 'event_type'
+CREATE INDEX IF NOT EXISTS idx_webhooks_events_gin
+    ON webhooks USING GIN ((events::jsonb));
+
+-- Create webhook_deliveries table for delivery tracking
+CREATE TABLE IF NOT EXISTS webhook_deliveries (
+    id               TEXT PRIMARY KEY,
+    webhook_id       TEXT NOT NULL REFERENCES webhooks(id) ON DELETE CASCADE,
+    event_type       TEXT NOT NULL,
+    payload          TEXT NOT NULL, -- JSON payload that was sent
+    response_status  INT,
+    response_body    TEXT,
+    delivered_at     TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    success          BOOLEAN NOT NULL DEFAULT false,
+    retry_count      INT NOT NULL DEFAULT 0,
+    error_message    TEXT -- Capture error details for failed deliveries
+);
+
+-- Index for listing deliveries by webhook
+CREATE INDEX IF NOT EXISTS idx_webhook_deliveries_webhook_id
+    ON webhook_deliveries(webhook_id, delivered_at DESC);
+
+-- Index for monitoring failed deliveries
+CREATE INDEX IF NOT EXISTS idx_webhook_deliveries_success
+    ON webhook_deliveries(success, delivered_at DESC)
+    WHERE success = false;
+
+-- Index for cleanup of old deliveries
+CREATE INDEX IF NOT EXISTS idx_webhook_deliveries_delivered_at
+    ON webhook_deliveries(delivered_at);
+
+COMMENT ON TABLE webhooks IS 'Webhook subscriptions for project events';
+COMMENT ON COLUMN webhooks.project_id IS 'Project ID that this webhook is subscribed to';
+COMMENT ON COLUMN webhooks.url IS 'URL to POST webhook payloads to';
+COMMENT ON COLUMN webhooks.secret IS 'Secret for HMAC-SHA256 signing (X-Webhook-Signature header)';
+COMMENT ON COLUMN webhooks.events IS 'JSON array of event types: command.started, command.completed, command.failed, pod.ready, pod.failed';
+COMMENT ON COLUMN webhooks.enabled IS 'Whether this webhook is active';
+
+COMMENT ON TABLE webhook_deliveries IS 'Webhook delivery history and retry tracking';
+COMMENT ON COLUMN webhook_deliveries.webhook_id IS 'Reference to the webhook configuration';
+COMMENT ON COLUMN webhook_deliveries.event_type IS 'Type of event that triggered this delivery';
+COMMENT ON COLUMN webhook_deliveries.payload IS 'JSON payload that was sent to the webhook URL';
+COMMENT ON COLUMN webhook_deliveries.response_status IS 'HTTP status code from the webhook endpoint';
+COMMENT ON COLUMN webhook_deliveries.response_body IS 'Response body from the webhook endpoint (truncated)';
+COMMENT ON COLUMN webhook_deliveries.success IS 'Whether the delivery was successful (2xx response)';
+COMMENT ON COLUMN webhook_deliveries.retry_count IS 'Number of retry attempts for this delivery';
+COMMENT ON COLUMN webhook_deliveries.error_message IS 'Error details if delivery failed';
--- a/internal/db/postgres.go
+++ b/internal/db/postgres.go
@ -61,9 +61,14 @@ func New(cfg Config, logger *slog.Logger) (*DB, error) {
 	}

 	// Configure connection pool
-	db.SetMaxOpenConns(10)
-	db.SetMaxIdleConns(5)
+	// MaxOpenConns: limit concurrent connections to avoid overloading database
+	db.SetMaxOpenConns(25)
+	// MaxIdleConns: maintain some connections for reuse
+	db.SetMaxIdleConns(10)
+	// ConnMaxLifetime: recycle connections to pick up config changes
 	db.SetConnMaxLifetime(5 * time.Minute)
+	// ConnMaxIdleTime: close idle connections to free resources
+	db.SetConnMaxIdleTime(1 * time.Minute)

 	// Verify connection
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
@ -102,7 +107,7 @@ func (db *DB) migrate() error {
 	if err != nil {
 		return fmt.Errorf("query migrations: %w", err)
 	}
-	defer rows.Close()
+	defer func() { _ = rows.Close() }()

 	for rows.Next() {
 		var version string
@ -147,12 +152,12 @@ func (db *DB) migrate() error {
 		}

 		if _, err := tx.Exec(string(content)); err != nil {
-			tx.Rollback()
+			_ = tx.Rollback()
 			return fmt.Errorf("exec migration %s: %w", version, err)
 		}

 		if _, err := tx.Exec("INSERT INTO schema_migrations (version) VALUES ($1)", version); err != nil {
-			tx.Rollback()
+			_ = tx.Rollback()
 			return fmt.Errorf("record migration %s: %w", version, err)
 		}

--- a/internal/domain/apikey.go
+++ b/internal/domain/apikey.go
@ -1,6 +1,9 @@
 package domain

-import "time"
+import (
+	"net"
+	"time"
+)

 // APIKeyID is a strongly-typed identifier for API keys.
 type APIKeyID string
@ -22,6 +25,7 @@ type APIKey struct {
 	KeyPrefix  string // First 8 chars of key for identification
 	Scopes     []Scope
 	ProjectIDs []ProjectID // nil = access to all projects
+	AllowedIPs []string    // CIDR notation, e.g., ["192.168.1.0/24", "10.0.0.0/8"]; nil = no restriction
 	CreatedAt  time.Time
 	ExpiresAt  *time.Time
 	LastUsedAt *time.Time
@ -81,3 +85,33 @@ func (k *APIKey) HasProjectAccess(projectID ProjectID) bool {
 	}
 	return false
 }
+
+// IsIPAllowed checks if the given IP address is allowed by the key's IP restrictions.
+// Returns true if no IP restrictions are set or if the IP matches any allowed CIDR.
+func (k *APIKey) IsIPAllowed(clientIP string) bool {
+	// No restrictions means all IPs are allowed
+	if len(k.AllowedIPs) == 0 {
+		return true
+	}
+
+	ip := net.ParseIP(clientIP)
+	if ip == nil {
+		return false
+	}
+
+	for _, cidr := range k.AllowedIPs {
+		_, network, err := net.ParseCIDR(cidr)
+		if err != nil {
+			// If not a CIDR, try parsing as single IP
+			allowedIP := net.ParseIP(cidr)
+			if allowedIP != nil && allowedIP.Equal(ip) {
+				return true
+			}
+			continue
+		}
+		if network.Contains(ip) {
+			return true
+		}
+	}
+	return false
+}
--- a/internal/domain/audit.go
+++ b/internal/domain/audit.go
@ -0,0 +1,88 @@
+package domain
+
+import (
+	"time"
+)
+
+// AuditStatus represents the status of a command execution.
+type AuditStatus string
+
+const (
+	AuditStatusRunning   AuditStatus = "running"
+	AuditStatusSuccess   AuditStatus = "success"
+	AuditStatusError     AuditStatus = "error"
+	AuditStatusCancelled AuditStatus = "cancelled"
+)
+
+// IsValid checks if the audit status is a valid value.
+func (s AuditStatus) IsValid() bool {
+	switch s {
+	case AuditStatusRunning, AuditStatusSuccess, AuditStatusError, AuditStatusCancelled:
+		return true
+	}
+	return false
+}
+
+// AuditLogEntry represents a single audit log entry for command execution.
+type AuditLogEntry struct {
+	ID              string      `json:"id"`
+	APIKeyID        string      `json:"api_key_id"`
+	CommandID       string      `json:"command_id"`
+	ProjectID       string      `json:"project_id"`
+	CommandType     CommandType `json:"command_type"`
+	Args            string      `json:"args,omitempty"` // JSON-encoded args
+	ClientIP        string      `json:"client_ip,omitempty"`
+	UserAgent       string      `json:"user_agent,omitempty"`
+	StartedAt       time.Time   `json:"started_at"`
+	CompletedAt     *time.Time  `json:"completed_at,omitempty"`
+	ExitCode        *int        `json:"exit_code,omitempty"`
+	DurationMs      *int64      `json:"duration_ms,omitempty"`
+	Status          AuditStatus `json:"status"`
+	ErrorMessage    string      `json:"error_message,omitempty"`
+	OutputSizeBytes int64       `json:"output_size_bytes"`
+	CreatedAt       time.Time   `json:"created_at"`
+}
+
+// AuditResult contains the result of a completed command for audit logging.
+type AuditResult struct {
+	ExitCode        int
+	DurationMs      int64
+	Status          AuditStatus
+	ErrorMessage    string
+	OutputSizeBytes int64
+}
+
+// AuditFilters defines the filters for querying audit logs.
+type AuditFilters struct {
+	// ProjectID filters by project ID.
+	ProjectID string
+
+	// APIKeyID filters by API key ID.
+	APIKeyID string
+
+	// CommandType filters by command type (claude, shell, git).
+	CommandType CommandType
+
+	// Status filters by audit status.
+	Status AuditStatus
+
+	// StartTime filters entries created at or after this time.
+	StartTime *time.Time
+
+	// EndTime filters entries created before this time.
+	EndTime *time.Time
+
+	// Limit is the maximum number of entries to return.
+	Limit int
+
+	// Offset is the number of entries to skip (for pagination).
+	Offset int
+}
+
+// DefaultAuditFilters returns default filter values.
+func DefaultAuditFilters() AuditFilters {
+	return AuditFilters{
+		Limit:  100,
+		Offset: 0,
+	}
+}
--- a/internal/domain/domain_test.go
+++ b/internal/domain/domain_test.go
@ -0,0 +1,663 @@
+package domain_test
+
+import (
+	"errors"
+	"testing"
+	"time"
+
+	"github.com/orchard9/rdev/internal/domain"
+)
+
+// =============================================================================
+// APIKey Tests
+// =============================================================================
+
+func TestAPIKey_IsExpired(t *testing.T) {
+	tests := []struct {
+		name      string
+		expiresAt *time.Time
+		want      bool
+	}{
+		{
+			name:      "nil expiration never expires",
+			expiresAt: nil,
+			want:      false,
+		},
+		{
+			name:      "future expiration not expired",
+			expiresAt: timePtr(time.Now().Add(time.Hour)),
+			want:      false,
+		},
+		{
+			name:      "past expiration is expired",
+			expiresAt: timePtr(time.Now().Add(-time.Hour)),
+			want:      true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			key := &domain.APIKey{ExpiresAt: tt.expiresAt}
+			if got := key.IsExpired(); got != tt.want {
+				t.Errorf("IsExpired() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestAPIKey_IsRevoked(t *testing.T) {
+	tests := []struct {
+		name      string
+		revokedAt *time.Time
+		want      bool
+	}{
+		{
+			name:      "nil revocation not revoked",
+			revokedAt: nil,
+			want:      false,
+		},
+		{
+			name:      "set revocation is revoked",
+			revokedAt: timePtr(time.Now()),
+			want:      true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			key := &domain.APIKey{RevokedAt: tt.revokedAt}
+			if got := key.IsRevoked(); got != tt.want {
+				t.Errorf("IsRevoked() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestAPIKey_IsActive(t *testing.T) {
+	now := time.Now()
+	tests := []struct {
+		name      string
+		expiresAt *time.Time
+		revokedAt *time.Time
+		want      bool
+	}{
+		{
+			name:      "active when no expiration and not revoked",
+			expiresAt: nil,
+			revokedAt: nil,
+			want:      true,
+		},
+		{
+			name:      "active when future expiration and not revoked",
+			expiresAt: timePtr(now.Add(time.Hour)),
+			revokedAt: nil,
+			want:      true,
+		},
+		{
+			name:      "inactive when expired",
+			expiresAt: timePtr(now.Add(-time.Hour)),
+			revokedAt: nil,
+			want:      false,
+		},
+		{
+			name:      "inactive when revoked",
+			expiresAt: nil,
+			revokedAt: timePtr(now),
+			want:      false,
+		},
+		{
+			name:      "inactive when both expired and revoked",
+			expiresAt: timePtr(now.Add(-time.Hour)),
+			revokedAt: timePtr(now),
+			want:      false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			key := &domain.APIKey{
+				ExpiresAt: tt.expiresAt,
+				RevokedAt: tt.revokedAt,
+			}
+			if got := key.IsActive(); got != tt.want {
+				t.Errorf("IsActive() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestAPIKey_HasScope(t *testing.T) {
+	tests := []struct {
+		name   string
+		scopes []domain.Scope
+		check  domain.Scope
+		want   bool
+	}{
+		{
+			name:   "empty scopes has nothing",
+			scopes: []domain.Scope{},
+			check:  domain.ScopeProjectsRead,
+			want:   false,
+		},
+		{
+			name:   "exact match",
+			scopes: []domain.Scope{domain.ScopeProjectsRead},
+			check:  domain.ScopeProjectsRead,
+			want:   true,
+		},
+		{
+			name:   "no match",
+			scopes: []domain.Scope{domain.ScopeProjectsRead},
+			check:  domain.ScopeProjectsExecute,
+			want:   false,
+		},
+		{
+			name:   "admin grants any scope",
+			scopes: []domain.Scope{domain.ScopeAdmin},
+			check:  domain.ScopeProjectsExecute,
+			want:   true,
+		},
+		{
+			name:   "multiple scopes match",
+			scopes: []domain.Scope{domain.ScopeProjectsRead, domain.ScopeProjectsExecute},
+			check:  domain.ScopeProjectsExecute,
+			want:   true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			key := &domain.APIKey{Scopes: tt.scopes}
+			if got := key.HasScope(tt.check); got != tt.want {
+				t.Errorf("HasScope(%v) = %v, want %v", tt.check, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestAPIKey_HasAnyScope(t *testing.T) {
+	tests := []struct {
+		name   string
+		scopes []domain.Scope
+		check  []domain.Scope
+		want   bool
+	}{
+		{
+			name:   "empty check returns false",
+			scopes: []domain.Scope{domain.ScopeProjectsRead},
+			check:  []domain.Scope{},
+			want:   false,
+		},
+		{
+			name:   "matches first scope",
+			scopes: []domain.Scope{domain.ScopeProjectsRead},
+			check:  []domain.Scope{domain.ScopeProjectsRead, domain.ScopeProjectsExecute},
+			want:   true,
+		},
+		{
+			name:   "matches second scope",
+			scopes: []domain.Scope{domain.ScopeProjectsExecute},
+			check:  []domain.Scope{domain.ScopeProjectsRead, domain.ScopeProjectsExecute},
+			want:   true,
+		},
+		{
+			name:   "no match",
+			scopes: []domain.Scope{domain.ScopeKeysManage},
+			check:  []domain.Scope{domain.ScopeProjectsRead, domain.ScopeProjectsExecute},
+			want:   false,
+		},
+		{
+			name:   "admin matches any",
+			scopes: []domain.Scope{domain.ScopeAdmin},
+			check:  []domain.Scope{domain.ScopeProjectsRead},
+			want:   true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			key := &domain.APIKey{Scopes: tt.scopes}
+			if got := key.HasAnyScope(tt.check...); got != tt.want {
+				t.Errorf("HasAnyScope(%v) = %v, want %v", tt.check, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestAPIKey_HasProjectAccess(t *testing.T) {
+	tests := []struct {
+		name       string
+		scopes     []domain.Scope
+		projectIDs []domain.ProjectID
+		checkID    domain.ProjectID
+		want       bool
+	}{
+		{
+			name:       "nil project list grants all access",
+			scopes:     []domain.Scope{domain.ScopeProjectsRead},
+			projectIDs: nil,
+			checkID:    "proj-1",
+			want:       true,
+		},
+		{
+			name:       "admin grants all access",
+			scopes:     []domain.Scope{domain.ScopeAdmin},
+			projectIDs: []domain.ProjectID{"proj-1"},
+			checkID:    "proj-2",
+			want:       true,
+		},
+		{
+			name:       "explicit project in list",
+			scopes:     []domain.Scope{domain.ScopeProjectsRead},
+			projectIDs: []domain.ProjectID{"proj-1", "proj-2"},
+			checkID:    "proj-1",
+			want:       true,
+		},
+		{
+			name:       "project not in list",
+			scopes:     []domain.Scope{domain.ScopeProjectsRead},
+			projectIDs: []domain.ProjectID{"proj-1", "proj-2"},
+			checkID:    "proj-3",
+			want:       false,
+		},
+		{
+			name:       "empty project list denies all",
+			scopes:     []domain.Scope{domain.ScopeProjectsRead},
+			projectIDs: []domain.ProjectID{},
+			checkID:    "proj-1",
+			want:       false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			key := &domain.APIKey{
+				Scopes:     tt.scopes,
+				ProjectIDs: tt.projectIDs,
+			}
+			if got := key.HasProjectAccess(tt.checkID); got != tt.want {
+				t.Errorf("HasProjectAccess(%v) = %v, want %v", tt.checkID, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestAPIKey_IsIPAllowed(t *testing.T) {
+	tests := []struct {
+		name       string
+		allowedIPs []string
+		clientIP   string
+		want       bool
+	}{
+		{
+			name:       "nil allowed IPs allows all",
+			allowedIPs: nil,
+			clientIP:   "192.168.1.100",
+			want:       true,
+		},
+		{
+			name:       "empty allowed IPs allows all",
+			allowedIPs: []string{},
+			clientIP:   "192.168.1.100",
+			want:       true,
+		},
+		{
+			name:       "exact IP match",
+			allowedIPs: []string{"192.168.1.100"},
+			clientIP:   "192.168.1.100",
+			want:       true,
+		},
+		{
+			name:       "exact IP no match",
+			allowedIPs: []string{"192.168.1.100"},
+			clientIP:   "192.168.1.101",
+			want:       false,
+		},
+		{
+			name:       "CIDR match",
+			allowedIPs: []string{"192.168.1.0/24"},
+			clientIP:   "192.168.1.100",
+			want:       true,
+		},
+		{
+			name:       "CIDR no match",
+			allowedIPs: []string{"192.168.1.0/24"},
+			clientIP:   "192.168.2.100",
+			want:       false,
+		},
+		{
+			name:       "multiple CIDRs first match",
+			allowedIPs: []string{"10.0.0.0/8", "192.168.0.0/16"},
+			clientIP:   "10.1.2.3",
+			want:       true,
+		},
+		{
+			name:       "multiple CIDRs second match",
+			allowedIPs: []string{"10.0.0.0/8", "192.168.0.0/16"},
+			clientIP:   "192.168.5.10",
+			want:       true,
+		},
+		{
+			name:       "multiple CIDRs no match",
+			allowedIPs: []string{"10.0.0.0/8", "192.168.0.0/16"},
+			clientIP:   "172.16.0.1",
+			want:       false,
+		},
+		{
+			name:       "IPv6 CIDR match",
+			allowedIPs: []string{"2001:db8::/32"},
+			clientIP:   "2001:db8::1",
+			want:       true,
+		},
+		{
+			name:       "IPv6 CIDR no match",
+			allowedIPs: []string{"2001:db8::/32"},
+			clientIP:   "2001:db9::1",
+			want:       false,
+		},
+		{
+			name:       "mixed IPv4 and IPv6",
+			allowedIPs: []string{"192.168.1.0/24", "2001:db8::/32"},
+			clientIP:   "2001:db8::100",
+			want:       true,
+		},
+		{
+			name:       "invalid client IP",
+			allowedIPs: []string{"192.168.1.0/24"},
+			clientIP:   "not-an-ip",
+			want:       false,
+		},
+		{
+			name:       "single IP with /32 CIDR",
+			allowedIPs: []string{"192.168.1.100/32"},
+			clientIP:   "192.168.1.100",
+			want:       true,
+		},
+		{
+			name:       "single IP with /32 CIDR no match",
+			allowedIPs: []string{"192.168.1.100/32"},
+			clientIP:   "192.168.1.101",
+			want:       false,
+		},
+		{
+			name:       "localhost IPv4",
+			allowedIPs: []string{"127.0.0.1"},
+			clientIP:   "127.0.0.1",
+			want:       true,
+		},
+		{
+			name:       "localhost IPv6",
+			allowedIPs: []string{"::1"},
+			clientIP:   "::1",
+			want:       true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			key := &domain.APIKey{AllowedIPs: tt.allowedIPs}
+			if got := key.IsIPAllowed(tt.clientIP); got != tt.want {
+				t.Errorf("IsIPAllowed(%q) = %v, want %v", tt.clientIP, got, tt.want)
+			}
+		})
+	}
+}
+
+// =============================================================================
+// CommandResult Tests
+// =============================================================================
+
+func TestCommandResult_Success(t *testing.T) {
+	tests := []struct {
+		name     string
+		exitCode int
+		err      error
+		want     bool
+	}{
+		{
+			name:     "success with zero exit code and no error",
+			exitCode: 0,
+			err:      nil,
+			want:     true,
+		},
+		{
+			name:     "failure with non-zero exit code",
+			exitCode: 1,
+			err:      nil,
+			want:     false,
+		},
+		{
+			name:     "failure with error",
+			exitCode: 0,
+			err:      errors.New("execution failed"),
+			want:     false,
+		},
+		{
+			name:     "failure with both error and non-zero exit",
+			exitCode: 127,
+			err:      errors.New("command not found"),
+			want:     false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := &domain.CommandResult{
+				ExitCode: tt.exitCode,
+				Error:    tt.err,
+			}
+			if got := result.Success(); got != tt.want {
+				t.Errorf("Success() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+// =============================================================================
+// ProjectStatus Tests
+// =============================================================================
+
+func TestProjectStatus_IsAvailable(t *testing.T) {
+	tests := []struct {
+		status domain.ProjectStatus
+		want   bool
+	}{
+		{domain.ProjectStatusRunning, true},
+		{domain.ProjectStatusPending, false},
+		{domain.ProjectStatusFailed, false},
+		{domain.ProjectStatusNotFound, false},
+		{domain.ProjectStatusUnknown, false},
+		{domain.ProjectStatusError, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(string(tt.status), func(t *testing.T) {
+			if got := tt.status.IsAvailable(); got != tt.want {
+				t.Errorf("ProjectStatus(%q).IsAvailable() = %v, want %v", tt.status, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestProjectStatus_IsTerminal(t *testing.T) {
+	tests := []struct {
+		status domain.ProjectStatus
+		want   bool
+	}{
+		{domain.ProjectStatusRunning, false},
+		{domain.ProjectStatusPending, false},
+		{domain.ProjectStatusFailed, true},
+		{domain.ProjectStatusNotFound, true},
+		{domain.ProjectStatusUnknown, false},
+		{domain.ProjectStatusError, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(string(tt.status), func(t *testing.T) {
+			if got := tt.status.IsTerminal(); got != tt.want {
+				t.Errorf("ProjectStatus(%q).IsTerminal() = %v, want %v", tt.status, got, tt.want)
+			}
+		})
+	}
+}
+
+// =============================================================================
+// Error Variables Tests
+// =============================================================================
+
+func TestErrorVariables_AreDistinct(t *testing.T) {
+	// Verify all domain errors are distinct and can be matched with errors.Is
+	allErrors := []error{
+		domain.ErrProjectNotFound,
+		domain.ErrProjectNotRunning,
+		domain.ErrCommandNotFound,
+		domain.ErrCommandTimeout,
+		domain.ErrCommandCancelled,
+		domain.ErrLimitExceeded,
+		domain.ErrInvalidCommand,
+		domain.ErrCommandSanitization,
+		domain.ErrKeyNotFound,
+		domain.ErrKeyRevoked,
+		domain.ErrKeyExpired,
+		domain.ErrKeyInvalid,
+		domain.ErrUnauthorized,
+		domain.ErrForbidden,
+		domain.ErrInsufficientScope,
+		domain.ErrRateLimited,
+		domain.ErrDatabaseConnection,
+		domain.ErrKubernetesError,
+	}
+
+	// Each error should only match itself
+	for i, err1 := range allErrors {
+		for j, err2 := range allErrors {
+			if i == j {
+				if !errors.Is(err1, err2) {
+					t.Errorf("error %v should match itself", err1)
+				}
+			} else {
+				if errors.Is(err1, err2) {
+					t.Errorf("error %v should not match %v", err1, err2)
+				}
+			}
+		}
+	}
+}
+
+func TestErrorVariables_CanBeWrapped(t *testing.T) {
+	// Domain errors should be usable as base errors for wrapping
+	wrapped := errors.Join(domain.ErrProjectNotFound, errors.New("additional context"))
+
+	if !errors.Is(wrapped, domain.ErrProjectNotFound) {
+		t.Error("wrapped error should match base domain error")
+	}
+}
+
+// =============================================================================
+// Type Constants Tests
+// =============================================================================
+
+func TestScopeConstants(t *testing.T) {
+	// Verify scope constants have expected values (for documentation)
+	expectedScopes := map[domain.Scope]string{
+		domain.ScopeAdmin:           "admin",
+		domain.ScopeProjectsRead:    "projects:read",
+		domain.ScopeProjectsExecute: "projects:execute",
+		domain.ScopeKeysManage:      "keys:manage",
+	}
+
+	for scope, expected := range expectedScopes {
+		if string(scope) != expected {
+			t.Errorf("Scope %v = %q, want %q", scope, string(scope), expected)
+		}
+	}
+}
+
+func TestCommandTypeConstants(t *testing.T) {
+	expectedTypes := map[domain.CommandType]string{
+		domain.CommandTypeClaude: "claude",
+		domain.CommandTypeShell:  "shell",
+		domain.CommandTypeGit:    "git",
+	}
+
+	for cmdType, expected := range expectedTypes {
+		if string(cmdType) != expected {
+			t.Errorf("CommandType %v = %q, want %q", cmdType, string(cmdType), expected)
+		}
+	}
+}
+
+func TestProjectStatusConstants(t *testing.T) {
+	expectedStatuses := map[domain.ProjectStatus]string{
+		domain.ProjectStatusRunning:  "running",
+		domain.ProjectStatusPending:  "pending",
+		domain.ProjectStatusFailed:   "failed",
+		domain.ProjectStatusNotFound: "not_found",
+		domain.ProjectStatusUnknown:  "unknown",
+		domain.ProjectStatusError:    "error",
+	}
+
+	for status, expected := range expectedStatuses {
+		if string(status) != expected {
+			t.Errorf("ProjectStatus %v = %q, want %q", status, string(status), expected)
+		}
+	}
+}
+
+// =============================================================================
+// Type Instantiation Tests
+// =============================================================================
+
+func TestProject_CanBeInstantiated(t *testing.T) {
+	proj := domain.Project{
+		ID:          "test-project",
+		Name:        "Test Project",
+		Description: "A test project",
+		PodName:     "test-pod",
+		Status:      domain.ProjectStatusRunning,
+		Workspace:   "/workspace",
+	}
+
+	if proj.ID != "test-project" {
+		t.Errorf("Project.ID = %q, want %q", proj.ID, "test-project")
+	}
+}
+
+func TestCommand_CanBeInstantiated(t *testing.T) {
+	now := time.Now()
+	cmd := domain.Command{
+		ID:        "cmd-1",
+		ProjectID: "proj-1",
+		Type:      domain.CommandTypeClaude,
+		Args:      []string{"--version"},
+		StartedAt: now,
+	}
+
+	if cmd.ID != "cmd-1" {
+		t.Errorf("Command.ID = %q, want %q", cmd.ID, "cmd-1")
+	}
+	if len(cmd.Args) != 1 {
+		t.Errorf("Command.Args length = %d, want 1", len(cmd.Args))
+	}
+}
+
+func TestOutputLine_CanBeInstantiated(t *testing.T) {
+	now := time.Now()
+	line := domain.OutputLine{
+		Stream:    "stdout",
+		Line:      "Hello, world!",
+		Timestamp: now,
+	}
+
+	if line.Stream != "stdout" {
+		t.Errorf("OutputLine.Stream = %q, want %q", line.Stream, "stdout")
+	}
+}
+
+// =============================================================================
+// Helpers
+// =============================================================================
+
+func timePtr(t time.Time) *time.Time {
+	return &t
+}
--- a/internal/domain/errors.go
+++ b/internal/domain/errors.go
@ -31,6 +31,9 @@ var (
 	// Rate limiting errors
 	ErrRateLimited = errors.New("rate limit exceeded")

+	// Audit errors
+	ErrAuditNotFound = errors.New("audit log entry not found")
+
 	// Infrastructure errors (should typically be wrapped)
 	ErrDatabaseConnection = errors.New("database connection error")
 	ErrKubernetesError    = errors.New("kubernetes error")
--- a/internal/domain/project.go
+++ b/internal/domain/project.go
@ -36,3 +36,19 @@ func (s ProjectStatus) IsAvailable() bool {
 func (s ProjectStatus) IsTerminal() bool {
 	return s == ProjectStatusFailed || s == ProjectStatusNotFound
 }
+
+// K8s label and annotation constants for project discovery.
+// Pods with these labels are discovered as rdev projects.
+const (
+	// LabelProject marks a pod as an rdev project when set to "true".
+	LabelProject = "rdev.orchard9.ai/project"
+
+	// LabelName specifies the project name (used as project ID).
+	LabelName = "rdev.orchard9.ai/name"
+
+	// LabelWorkspace specifies the workspace path inside the pod.
+	LabelWorkspace = "rdev.orchard9.ai/workspace"
+
+	// AnnotDescription provides a human-readable description of the project.
+	AnnotDescription = "rdev.orchard9.ai/description"
+)
--- a/internal/domain/queue.go
+++ b/internal/domain/queue.go
@ -0,0 +1,79 @@
+package domain
+
+import "time"
+
+// QueuedCommandID is a strongly-typed identifier for queued commands.
+type QueuedCommandID string
+
+// QueueStatus represents the status of a queued command.
+type QueueStatus string
+
+// Available queue statuses.
+const (
+	QueueStatusPending   QueueStatus = "pending"
+	QueueStatusRunning   QueueStatus = "running"
+	QueueStatusCompleted QueueStatus = "completed"
+	QueueStatusFailed    QueueStatus = "failed"
+	QueueStatusCancelled QueueStatus = "cancelled"
+)
+
+// IsTerminal returns true if the status represents a final state.
+func (s QueueStatus) IsTerminal() bool {
+	return s == QueueStatusCompleted || s == QueueStatusFailed || s == QueueStatusCancelled
+}
+
+// String returns the status as a string.
+func (s QueueStatus) String() string {
+	return string(s)
+}
+
+// QueuedCommand represents a command waiting to be executed.
+type QueuedCommand struct {
+	ID          QueuedCommandID `json:"id"`
+	ProjectID   string          `json:"project_id"`
+	Command     string          `json:"command"`      // Prompt for claude, command for shell, JSON-encoded args for git
+	CommandType CommandType     `json:"command_type"` // claude, shell, git
+	WorkingDir  string          `json:"working_dir,omitempty"`
+	Status      QueueStatus     `json:"status"`
+	Priority    int             `json:"priority"`
+	CreatedAt   time.Time       `json:"created_at"`
+	StartedAt   *time.Time      `json:"started_at,omitempty"`
+	CompletedAt *time.Time      `json:"completed_at,omitempty"`
+	ExitCode    *int            `json:"exit_code,omitempty"`
+	Output      string          `json:"output,omitempty"`
+	Error       string          `json:"error,omitempty"`
+	APIKeyID    string          `json:"api_key_id,omitempty"` // For audit trail
+}
+
+// CommandResult holds the result of executing a queued command.
+type QueuedCommandResult struct {
+	ExitCode int
+	Output   string
+	Error    string
+}
+
+// QueueFilters contains filter options for listing queued commands.
+type QueueFilters struct {
+	Status    *QueueStatus // Filter by status
+	Limit     int          // Max results (default 100)
+	Offset    int          // For pagination
+	SortOrder string       // "asc" or "desc" by created_at (default "desc")
+}
+
+// DefaultQueueFilters returns sensible defaults for queue listing.
+func DefaultQueueFilters() *QueueFilters {
+	return &QueueFilters{
+		Limit:     100,
+		Offset:    0,
+		SortOrder: "desc",
+	}
+}
+
+// QueueStats holds statistics about the command queue.
+type QueueStats struct {
+	TotalPending   int `json:"total_pending"`
+	TotalRunning   int `json:"total_running"`
+	TotalCompleted int `json:"total_completed"`
+	TotalFailed    int `json:"total_failed"`
+	TotalCancelled int `json:"total_cancelled"`
+}
--- a/internal/domain/rate_limit.go
+++ b/internal/domain/rate_limit.go
@ -0,0 +1,88 @@
+package domain
+
+import "time"
+
+// Default rate limit values. These must match the defaults in
+// internal/db/migrations/005_rate_limiting.sql
+const (
+	DefaultRateLimitPerMinute = 60
+	DefaultRateLimitPerHour   = 1000
+)
+
+// RateLimitConfig holds the rate limit configuration for an API key.
+type RateLimitConfig struct {
+	// PerMinute is the maximum number of requests allowed per minute.
+	PerMinute int
+
+	// PerHour is the maximum number of requests allowed per hour.
+	PerHour int
+}
+
+// DefaultRateLimitConfig returns the default rate limit configuration.
+func DefaultRateLimitConfig() RateLimitConfig {
+	return RateLimitConfig{
+		PerMinute: DefaultRateLimitPerMinute,
+		PerHour:   DefaultRateLimitPerHour,
+	}
+}
+
+// RateLimitState tracks the current usage within a time window.
+type RateLimitState struct {
+	// APIKeyID is the identifier of the API key.
+	APIKeyID string
+
+	// WindowStart is the beginning of the current time window.
+	WindowStart time.Time
+
+	// WindowType indicates the type of window ("minute" or "hour").
+	WindowType string
+
+	// RequestCount is the number of requests made in this window.
+	RequestCount int
+
+	// UpdatedAt is when this state was last updated.
+	UpdatedAt time.Time
+}
+
+// WindowTypeMinute is the constant for minute-based windows.
+const WindowTypeMinute = "minute"
+
+// WindowTypeHour is the constant for hour-based windows.
+const WindowTypeHour = "hour"
+
+// RateLimitResult contains the result of a rate limit check.
+type RateLimitResult struct {
+	// Allowed indicates whether the request is allowed.
+	Allowed bool
+
+	// RetryAfter is the duration to wait before retrying (when not allowed).
+	RetryAfter time.Duration
+
+	// RemainingMinute is the number of requests remaining in the current minute.
+	RemainingMinute int
+
+	// RemainingHour is the number of requests remaining in the current hour.
+	RemainingHour int
+
+	// LimitMinute is the per-minute limit for this key.
+	LimitMinute int
+
+	// LimitHour is the per-hour limit for this key.
+	LimitHour int
+
+	// ResetMinute is when the minute window resets.
+	ResetMinute time.Time
+
+	// ResetHour is when the hour window resets.
+	ResetHour time.Time
+}
+
+// TruncateToMinute truncates a time to the start of the minute.
+func TruncateToMinute(t time.Time) time.Time {
+	return t.Truncate(time.Minute)
+}
+
+// TruncateToHour truncates a time to the start of the hour.
+func TruncateToHour(t time.Time) time.Time {
+	return t.Truncate(time.Hour)
+}
--- a/internal/domain/webhook.go
+++ b/internal/domain/webhook.go
@ -0,0 +1,160 @@
+package domain
+
+import (
+	"errors"
+	"time"
+)
+
+// WebhookID is a strongly-typed identifier for webhooks.
+type WebhookID string
+
+// String returns the webhook ID as a string.
+func (id WebhookID) String() string {
+	return string(id)
+}
+
+// WebhookEventType represents the type of event that triggers a webhook.
+type WebhookEventType string
+
+// Available webhook event types.
+const (
+	WebhookEventCommandStarted   WebhookEventType = "command.started"
+	WebhookEventCommandCompleted WebhookEventType = "command.completed"
+	WebhookEventCommandFailed    WebhookEventType = "command.failed"
+	WebhookEventPodReady         WebhookEventType = "pod.ready"
+	WebhookEventPodFailed        WebhookEventType = "pod.failed"
+)
+
+// AllWebhookEventTypes lists all valid webhook event types.
+var AllWebhookEventTypes = []WebhookEventType{
+	WebhookEventCommandStarted,
+	WebhookEventCommandCompleted,
+	WebhookEventCommandFailed,
+	WebhookEventPodReady,
+	WebhookEventPodFailed,
+}
+
+// IsValid checks if a webhook event type is valid.
+func (t WebhookEventType) IsValid() bool {
+	for _, valid := range AllWebhookEventTypes {
+		if t == valid {
+			return true
+		}
+	}
+	return false
+}
+
+// String returns the event type as a string.
+func (t WebhookEventType) String() string {
+	return string(t)
+}
+
+// Webhook represents a webhook subscription for a project.
+type Webhook struct {
+	ID        WebhookID          `json:"id"`
+	ProjectID string             `json:"project_id"`
+	URL       string             `json:"url"`
+	Secret    string             `json:"-"` // Never expose secret in JSON responses
+	Events    []WebhookEventType `json:"events"`
+	Enabled   bool               `json:"enabled"`
+	CreatedAt time.Time          `json:"created_at"`
+	UpdatedAt time.Time          `json:"updated_at"`
+}
+
+// HasSecret returns true if the webhook has a signing secret configured.
+func (w *Webhook) HasSecret() bool {
+	return w.Secret != ""
+}
+
+// SubscribesToEvent checks if the webhook subscribes to the given event type.
+func (w *Webhook) SubscribesToEvent(eventType WebhookEventType) bool {
+	for _, e := range w.Events {
+		if e == eventType {
+			return true
+		}
+	}
+	return false
+}
+
+// WebhookDeliveryID is a strongly-typed identifier for webhook deliveries.
+type WebhookDeliveryID string
+
+// String returns the delivery ID as a string.
+func (id WebhookDeliveryID) String() string {
+	return string(id)
+}
+
+// WebhookDelivery represents a single webhook delivery attempt.
+type WebhookDelivery struct {
+	ID             WebhookDeliveryID `json:"id"`
+	WebhookID      WebhookID         `json:"webhook_id"`
+	EventType      WebhookEventType  `json:"event_type"`
+	Payload        string            `json:"payload"` // JSON payload that was sent
+	ResponseStatus int               `json:"response_status,omitempty"`
+	ResponseBody   string            `json:"response_body,omitempty"`
+	DeliveredAt    time.Time         `json:"delivered_at"`
+	Success        bool              `json:"success"`
+	RetryCount     int               `json:"retry_count"`
+	ErrorMessage   string            `json:"error_message,omitempty"`
+}
+
+// WebhookEvent represents an event to be dispatched to webhooks.
+type WebhookEvent struct {
+	Type      WebhookEventType `json:"type"`
+	Timestamp time.Time        `json:"timestamp"`
+	ProjectID string           `json:"project_id"`
+	Data      any              `json:"data"`
+}
+
+// WebhookPayload is the structure sent to webhook endpoints.
+type WebhookPayload struct {
+	ID        string           `json:"id"`         // Unique delivery ID
+	Event     WebhookEventType `json:"event"`      // Event type
+	Timestamp time.Time        `json:"timestamp"`  // When the event occurred
+	ProjectID string           `json:"project_id"` // Project this event relates to
+	Data      any              `json:"data"`       // Event-specific data
+}
+
+// CommandEventData is the data structure for command-related webhook events.
+type CommandEventData struct {
+	CommandID   string      `json:"command_id"`
+	CommandType CommandType `json:"command_type"`
+	ProjectID   string      `json:"project_id"`
+	StartedAt   time.Time   `json:"started_at,omitempty"`
+	CompletedAt time.Time   `json:"completed_at,omitempty"`
+	ExitCode    int         `json:"exit_code,omitempty"`
+	DurationMs  int64       `json:"duration_ms,omitempty"`
+	Error       string      `json:"error,omitempty"`
+}
+
+// PodEventData is the data structure for pod-related webhook events.
+type PodEventData struct {
+	PodName   string    `json:"pod_name"`
+	ProjectID string    `json:"project_id"`
+	Status    string    `json:"status"`
+	Reason    string    `json:"reason,omitempty"`
+	Message   string    `json:"message,omitempty"`
+	Timestamp time.Time `json:"timestamp"`
+}
+
+// WebhookFilters contains filter options for listing webhook deliveries.
+type WebhookDeliveryFilters struct {
+	EventType *WebhookEventType // Filter by event type
+	Success   *bool             // Filter by success status
+	Limit     int               // Max results (default 100)
+	Offset    int               // For pagination
+}
+
+// DefaultWebhookDeliveryFilters returns sensible defaults.
+func DefaultWebhookDeliveryFilters() *WebhookDeliveryFilters {
+	return &WebhookDeliveryFilters{
+		Limit:  100,
+		Offset: 0,
+	}
+}
+
+// Webhook-related errors.
+var (
+	ErrWebhookNotFound = errors.New("webhook not found")
+	ErrInvalidWebhook  = errors.New("invalid webhook configuration")
+)
--- a/internal/executor/executor.go
+++ b/internal/executor/executor.go
@ -1,218 +0,0 @@
-// Package executor provides kubectl exec functionality for running commands in pods.
-package executor
-
-import (
-	"bufio"
-	"context"
-	"fmt"
-	"io"
-	"os/exec"
-	"sync"
-	"time"
-)
-
-// Executor runs commands in Kubernetes pods via kubectl exec.
-type Executor struct {
-	namespace string
-	mu        sync.RWMutex
-}
-
-// New creates a new Executor for the given namespace.
-func New(namespace string) *Executor {
-	return &Executor{
-		namespace: namespace,
-	}
-}
-
-// CommandType represents the type of command being executed.
-type CommandType string
-
-const (
-	CommandTypeClaude CommandType = "claude"
-	CommandTypeShell  CommandType = "shell"
-	CommandTypeGit    CommandType = "git"
-)
-
-// Command represents a command to execute in a pod.
-type Command struct {
-	ID        string
-	PodName   string
-	Type      CommandType
-	Args      []string
-	StartedAt time.Time
-}
-
-// Result represents the result of command execution.
-type Result struct {
-	ExitCode   int
-	DurationMs int64
-	Error      error
-}
-
-// OutputHandler is called for each line of output from the command.
-type OutputHandler func(stream string, line string)
-
-// CommandExecutor defines the interface for executing commands in pods.
-// This interface enables testing with mock implementations.
-type CommandExecutor interface {
-	Exec(ctx context.Context, cmd *Command, handler OutputHandler) Result
-	PodExists(ctx context.Context, podName string) (bool, error)
-	CheckConnection(ctx context.Context) error
-}
-
-// Ensure Executor implements CommandExecutor at compile time.
-var _ CommandExecutor = (*Executor)(nil)
-
-// Exec executes a command in the specified pod.
-// It streams output to the provided handler and returns when complete.
-func (e *Executor) Exec(ctx context.Context, cmd *Command, handler OutputHandler) Result {
-	e.mu.RLock()
-	namespace := e.namespace
-	e.mu.RUnlock()
-
-	startTime := time.Now()
-	var args []string
-
-	switch cmd.Type {
-	case CommandTypeClaude:
-		// claude "prompt"
-		args = []string{
-			"exec", "-n", namespace, cmd.PodName, "--",
-			"claude", cmd.Args[0], // prompt is first arg
-		}
-	case CommandTypeShell:
-		// bash -c "command"
-		args = []string{
-			"exec", "-n", namespace, cmd.PodName, "--",
-			"bash", "-c", cmd.Args[0], // command is first arg
-		}
-	case CommandTypeGit:
-		// git <args...>
-		args = append([]string{
-			"exec", "-n", namespace, cmd.PodName, "--",
-			"git", "-C", "/workspace",
-		}, cmd.Args...)
-	default:
-		return Result{
-			ExitCode: 1,
-			Error:    fmt.Errorf("unknown command type: %s", cmd.Type),
-		}
-	}
-
-	// Create the kubectl command
-	kubectl := exec.CommandContext(ctx, "kubectl", args...)
-
-	// Get stdout and stderr pipes
-	stdout, err := kubectl.StdoutPipe()
-	if err != nil {
-		return Result{ExitCode: 1, Error: fmt.Errorf("stdout pipe: %w", err)}
-	}
-	stderr, err := kubectl.StderrPipe()
-	if err != nil {
-		return Result{ExitCode: 1, Error: fmt.Errorf("stderr pipe: %w", err)}
-	}
-
-	// Start the command
-	if err := kubectl.Start(); err != nil {
-		return Result{ExitCode: 1, Error: fmt.Errorf("start: %w", err)}
-	}
-
-	// Stream output concurrently
-	var wg sync.WaitGroup
-	wg.Add(2)
-
-	go func() {
-		defer wg.Done()
-		streamOutput(stdout, "stdout", handler)
-	}()
-
-	go func() {
-		defer wg.Done()
-		streamOutput(stderr, "stderr", handler)
-	}()
-
-	// Wait for output to be consumed
-	wg.Wait()
-
-	// Wait for command to complete
-	err = kubectl.Wait()
-	duration := time.Since(startTime)
-
-	result := Result{
-		DurationMs: duration.Milliseconds(),
-	}
-
-	if err != nil {
-		if exitError, ok := err.(*exec.ExitError); ok {
-			result.ExitCode = exitError.ExitCode()
-		} else {
-			result.ExitCode = 1
-			result.Error = err
-		}
-	}
-
-	return result
-}
-
-// streamOutput reads from a reader and sends each line to the handler.
-func streamOutput(r io.Reader, stream string, handler OutputHandler) {
-	scanner := bufio.NewScanner(r)
-	// Increase buffer size for long lines
-	buf := make([]byte, 0, 64*1024)
-	scanner.Buffer(buf, 1024*1024)
-
-	for scanner.Scan() {
-		handler(stream, scanner.Text())
-	}
-}
-
-// CheckConnection verifies kubectl can connect to the cluster.
-func (e *Executor) CheckConnection(ctx context.Context) error {
-	cmd := exec.CommandContext(ctx, "kubectl", "cluster-info", "--request-timeout=5s")
-	return cmd.Run()
-}
-
-// ExecSimple executes a shell command and returns the output as a string.
-// This is a convenience method for simple commands that don't need streaming.
-func (e *Executor) ExecSimple(podName, command string) (string, error) {
-	e.mu.RLock()
-	namespace := e.namespace
-	e.mu.RUnlock()
-
-	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
-	defer cancel()
-
-	args := []string{
-		"exec", "-n", namespace, podName, "-c", "claudebox", "--",
-		"bash", "-c", command,
-	}
-
-	cmd := exec.CommandContext(ctx, "kubectl", args...)
-	output, err := cmd.CombinedOutput()
-	if err != nil {
-		return string(output), err
-	}
-
-	return string(output), nil
-}
-
-// PodExists checks if a pod exists and is running.
-func (e *Executor) PodExists(ctx context.Context, podName string) (bool, error) {
-	e.mu.RLock()
-	namespace := e.namespace
-	e.mu.RUnlock()
-
-	cmd := exec.CommandContext(ctx, "kubectl",
-		"get", "pod", podName,
-		"-n", namespace,
-		"-o", "jsonpath={.status.phase}",
-	)
-
-	output, err := cmd.Output()
-	if err != nil {
-		// Pod doesn't exist or error
-		return false, nil
-	}
-
-	return string(output) == "Running", nil
-}
--- a/internal/executor/executor_test.go
+++ b/internal/executor/executor_test.go
@ -1,359 +0,0 @@
-package executor
-
-import (
-	"context"
-	"os/exec"
-	"strings"
-	"sync"
-	"testing"
-	"time"
-)
-
-func TestNew(t *testing.T) {
-	e := New("test-namespace")
-	if e.namespace != "test-namespace" {
-		t.Errorf("namespace = %q, want %q", e.namespace, "test-namespace")
-	}
-}
-
-func TestCommand_Types(t *testing.T) {
-	tests := []struct {
-		cmdType CommandType
-		want    string
-	}{
-		{CommandTypeClaude, "claude"},
-		{CommandTypeShell, "shell"},
-		{CommandTypeGit, "git"},
-	}
-
-	for _, tt := range tests {
-		t.Run(string(tt.cmdType), func(t *testing.T) {
-			if string(tt.cmdType) != tt.want {
-				t.Errorf("CommandType = %q, want %q", tt.cmdType, tt.want)
-			}
-		})
-	}
-}
-
-func TestExecutor_buildArgs(t *testing.T) {
-	e := New("apps")
-
-	tests := []struct {
-		name     string
-		cmd      *Command
-		wantArgs []string
-		wantErr  bool
-	}{
-		{
-			name: "claude command",
-			cmd: &Command{
-				ID:      "cmd-1",
-				PodName: "claudebox-test",
-				Type:    CommandTypeClaude,
-				Args:    []string{"Write a hello world"},
-			},
-			wantArgs: []string{
-				"exec", "-n", "apps", "claudebox-test", "--",
-				"claude", "Write a hello world",
-			},
-		},
-		{
-			name: "shell command",
-			cmd: &Command{
-				ID:      "cmd-2",
-				PodName: "claudebox-test",
-				Type:    CommandTypeShell,
-				Args:    []string{"ls -la /workspace"},
-			},
-			wantArgs: []string{
-				"exec", "-n", "apps", "claudebox-test", "--",
-				"bash", "-c", "ls -la /workspace",
-			},
-		},
-		{
-			name: "git command",
-			cmd: &Command{
-				ID:      "cmd-3",
-				PodName: "claudebox-test",
-				Type:    CommandTypeGit,
-				Args:    []string{"status"},
-			},
-			wantArgs: []string{
-				"exec", "-n", "apps", "claudebox-test", "--",
-				"git", "-C", "/workspace", "status",
-			},
-		},
-		{
-			name: "git command with multiple args",
-			cmd: &Command{
-				ID:      "cmd-4",
-				PodName: "claudebox-test",
-				Type:    CommandTypeGit,
-				Args:    []string{"commit", "-m", "test message"},
-			},
-			wantArgs: []string{
-				"exec", "-n", "apps", "claudebox-test", "--",
-				"git", "-C", "/workspace", "commit", "-m", "test message",
-			},
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			// We can't directly test buildArgs since it's internal to Exec,
-			// but we can verify the command construction by checking what would be built
-			var args []string
-
-			switch tt.cmd.Type {
-			case CommandTypeClaude:
-				args = []string{
-					"exec", "-n", e.namespace, tt.cmd.PodName, "--",
-					"claude", tt.cmd.Args[0],
-				}
-			case CommandTypeShell:
-				args = []string{
-					"exec", "-n", e.namespace, tt.cmd.PodName, "--",
-					"bash", "-c", tt.cmd.Args[0],
-				}
-			case CommandTypeGit:
-				args = append([]string{
-					"exec", "-n", e.namespace, tt.cmd.PodName, "--",
-					"git", "-C", "/workspace",
-				}, tt.cmd.Args...)
-			}
-
-			if len(args) != len(tt.wantArgs) {
-				t.Errorf("args length = %d, want %d", len(args), len(tt.wantArgs))
-				return
-			}
-
-			for i, arg := range args {
-				if arg != tt.wantArgs[i] {
-					t.Errorf("args[%d] = %q, want %q", i, arg, tt.wantArgs[i])
-				}
-			}
-		})
-	}
-}
-
-func TestExecutor_Exec_UnknownType(t *testing.T) {
-	e := New("test")
-
-	var output []string
-	handler := func(stream, line string) {
-		output = append(output, line)
-	}
-
-	result := e.Exec(context.Background(), &Command{
-		Type: CommandType("unknown"),
-	}, handler)
-
-	if result.ExitCode != 1 {
-		t.Errorf("ExitCode = %d, want 1", result.ExitCode)
-	}
-
-	if result.Error == nil {
-		t.Error("Error should not be nil for unknown command type")
-	}
-
-	if !strings.Contains(result.Error.Error(), "unknown command type") {
-		t.Errorf("Error = %v, want to contain 'unknown command type'", result.Error)
-	}
-}
-
-func TestExecutor_Exec_ContextCancellation(t *testing.T) {
-	// Skip if kubectl is not available
-	if _, err := exec.LookPath("kubectl"); err != nil {
-		t.Skip("kubectl not available")
-	}
-
-	e := New("default")
-
-	ctx, cancel := context.WithCancel(context.Background())
-
-	var wg sync.WaitGroup
-	var result Result
-
-	wg.Add(1)
-	go func() {
-		defer wg.Done()
-		result = e.Exec(ctx, &Command{
-			ID:      "test-cancel",
-			PodName: "nonexistent-pod",
-			Type:    CommandTypeShell,
-			Args:    []string{"sleep 10"},
-		}, func(stream, line string) {})
-	}()
-
-	// Cancel immediately
-	cancel()
-
-	wg.Wait()
-
-	// The command should either fail due to context cancellation or pod not found
-	// Either way it shouldn't hang
-	if result.ExitCode == 0 && result.Error == nil {
-		t.Error("Expected command to fail due to cancellation or pod not found")
-	}
-}
-
-func TestExecutor_CheckConnection(t *testing.T) {
-	// This test requires kubectl to be configured
-	if _, err := exec.LookPath("kubectl"); err != nil {
-		t.Skip("kubectl not available")
-	}
-
-	e := New("default")
-	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-	defer cancel()
-
-	// CheckConnection will succeed if kubectl is configured, fail otherwise
-	// We just verify it doesn't panic
-	_ = e.CheckConnection(ctx)
-}
-
-func TestExecutor_PodExists(t *testing.T) {
-	// Skip if kubectl is not available
-	if _, err := exec.LookPath("kubectl"); err != nil {
-		t.Skip("kubectl not available")
-	}
-
-	e := New("default")
-	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-	defer cancel()
-
-	// Check for a pod that definitely doesn't exist
-	exists, err := e.PodExists(ctx, "definitely-nonexistent-pod-12345")
-
-	// Should return false without error (or skip if cluster not available)
-	if err != nil {
-		t.Skipf("cluster not available: %v", err)
-	}
-
-	if exists {
-		t.Error("Expected pod to not exist")
-	}
-}
-
-// TestStreamOutput tests the streamOutput function behavior
-func TestStreamOutput(t *testing.T) {
-	tests := []struct {
-		name   string
-		input  string
-		want   []string
-	}{
-		{
-			name:  "single line",
-			input: "hello world",
-			want:  []string{"hello world"},
-		},
-		{
-			name:  "multiple lines",
-			input: "line1\nline2\nline3",
-			want:  []string{"line1", "line2", "line3"},
-		},
-		{
-			name:  "empty input",
-			input: "",
-			want:  nil,
-		},
-		{
-			name:  "trailing newline",
-			input: "hello\n",
-			want:  []string{"hello"},
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			var got []string
-			handler := func(stream, line string) {
-				if stream != "stdout" {
-					t.Errorf("stream = %q, want %q", stream, "stdout")
-				}
-				got = append(got, line)
-			}
-
-			r := strings.NewReader(tt.input)
-			streamOutput(r, "stdout", handler)
-
-			if len(got) != len(tt.want) {
-				t.Errorf("got %d lines, want %d", len(got), len(tt.want))
-				return
-			}
-
-			for i, line := range got {
-				if line != tt.want[i] {
-					t.Errorf("line[%d] = %q, want %q", i, line, tt.want[i])
-				}
-			}
-		})
-	}
-}
-
-// TestResult verifies Result struct behavior
-func TestResult(t *testing.T) {
-	t.Run("successful result", func(t *testing.T) {
-		r := Result{
-			ExitCode:   0,
-			DurationMs: 1500,
-			Error:      nil,
-		}
-
-		if r.ExitCode != 0 {
-			t.Errorf("ExitCode = %d, want 0", r.ExitCode)
-		}
-
-		if r.DurationMs != 1500 {
-			t.Errorf("DurationMs = %d, want 1500", r.DurationMs)
-		}
-
-		if r.Error != nil {
-			t.Errorf("Error = %v, want nil", r.Error)
-		}
-	})
-
-	t.Run("failed result", func(t *testing.T) {
-		r := Result{
-			ExitCode:   1,
-			DurationMs: 500,
-			Error:      nil,
-		}
-
-		if r.ExitCode != 1 {
-			t.Errorf("ExitCode = %d, want 1", r.ExitCode)
-		}
-	})
-}
-
-// TestCommand verifies Command struct
-func TestCommand(t *testing.T) {
-	now := time.Now()
-	cmd := Command{
-		ID:        "cmd-123",
-		PodName:   "test-pod",
-		Type:      CommandTypeClaude,
-		Args:      []string{"prompt here"},
-		StartedAt: now,
-	}
-
-	if cmd.ID != "cmd-123" {
-		t.Errorf("ID = %q, want %q", cmd.ID, "cmd-123")
-	}
-
-	if cmd.PodName != "test-pod" {
-		t.Errorf("PodName = %q, want %q", cmd.PodName, "test-pod")
-	}
-
-	if cmd.Type != CommandTypeClaude {
-		t.Errorf("Type = %q, want %q", cmd.Type, CommandTypeClaude)
-	}
-
-	if len(cmd.Args) != 1 || cmd.Args[0] != "prompt here" {
-		t.Errorf("Args = %v, want [\"prompt here\"]", cmd.Args)
-	}
-
-	if !cmd.StartedAt.Equal(now) {
-		t.Errorf("StartedAt = %v, want %v", cmd.StartedAt, now)
-	}
-}
--- a/internal/handlers/audit.go
+++ b/internal/handlers/audit.go
@ -0,0 +1,225 @@
+package handlers
+
+import (
+	"errors"
+	"net/http"
+	"strconv"
+	"time"
+
+	"github.com/go-chi/chi/v5"
+	"github.com/orchard9/rdev/internal/auth"
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/port"
+	"github.com/orchard9/rdev/pkg/api"
+)
+
+// AuditHandler handles audit log endpoints.
+type AuditHandler struct {
+	auditLogger port.AuditLogger
+}
+
+// NewAuditHandler creates a new audit handler.
+func NewAuditHandler(auditLogger port.AuditLogger) *AuditHandler {
+	return &AuditHandler{auditLogger: auditLogger}
+}
+
+// Mount registers the audit routes.
+func (h *AuditHandler) Mount(r api.Router) {
+	r.Route("/audit-log", func(r chi.Router) {
+		// All audit endpoints require authentication with audit:read scope
+		r.With(auth.RequireScope(auth.ScopeAuditRead, auth.ScopeAdmin)).Get("/", h.List)
+		r.With(auth.RequireScope(auth.ScopeAuditRead, auth.ScopeAdmin)).Get("/{command_id}", h.Get)
+	})
+}
+
+// AuditLogResponse is the JSON response for an audit log entry.
+type AuditLogResponse struct {
+	ID              string  `json:"id"`
+	APIKeyID        string  `json:"api_key_id"`
+	CommandID       string  `json:"command_id"`
+	ProjectID       string  `json:"project_id"`
+	CommandType     string  `json:"command_type"`
+	Args            string  `json:"args,omitempty"`
+	ClientIP        string  `json:"client_ip,omitempty"`
+	UserAgent       string  `json:"user_agent,omitempty"`
+	StartedAt       string  `json:"started_at"`
+	CompletedAt     *string `json:"completed_at,omitempty"`
+	ExitCode        *int    `json:"exit_code,omitempty"`
+	DurationMs      *int64  `json:"duration_ms,omitempty"`
+	Status          string  `json:"status"`
+	ErrorMessage    string  `json:"error_message,omitempty"`
+	OutputSizeBytes int64   `json:"output_size_bytes"`
+	CreatedAt       string  `json:"created_at"`
+}
+
+// auditLogToResponse converts an AuditLogEntry to a JSON response.
+func auditLogToResponse(entry *domain.AuditLogEntry) AuditLogResponse {
+	resp := AuditLogResponse{
+		ID:              entry.ID,
+		APIKeyID:        entry.APIKeyID,
+		CommandID:       entry.CommandID,
+		ProjectID:       entry.ProjectID,
+		CommandType:     string(entry.CommandType),
+		Args:            entry.Args,
+		ClientIP:        entry.ClientIP,
+		UserAgent:       entry.UserAgent,
+		StartedAt:       entry.StartedAt.Format(time.RFC3339),
+		Status:          string(entry.Status),
+		ErrorMessage:    entry.ErrorMessage,
+		OutputSizeBytes: entry.OutputSizeBytes,
+		CreatedAt:       entry.CreatedAt.Format(time.RFC3339),
+	}
+
+	if entry.CompletedAt != nil {
+		s := entry.CompletedAt.Format(time.RFC3339)
+		resp.CompletedAt = &s
+	}
+
+	if entry.ExitCode != nil {
+		resp.ExitCode = entry.ExitCode
+	}
+
+	if entry.DurationMs != nil {
+		resp.DurationMs = entry.DurationMs
+	}
+
+	return resp
+}
+
+// ListAuditLogResponse is the JSON response for listing audit logs.
+type ListAuditLogResponse struct {
+	Entries []AuditLogResponse `json:"entries"`
+	Total   int                `json:"total"`
+	Limit   int                `json:"limit"`
+	Offset  int                `json:"offset"`
+}
+
+// List returns audit log entries with optional filters.
+// GET /audit-log
+// Query parameters:
+//   - project: filter by project ID
+//   - api_key: filter by API key ID
+//   - command_type: filter by command type (claude, shell, git)
+//   - status: filter by status (running, success, error, cancelled)
+//   - start: filter by start time (RFC3339 format)
+//   - end: filter by end time (RFC3339 format)
+//   - limit: maximum number of entries (default 100, max 1000)
+//   - offset: number of entries to skip (for pagination)
+func (h *AuditHandler) List(w http.ResponseWriter, r *http.Request) {
+	filters := domain.DefaultAuditFilters()
+
+	// Parse project filter
+	if project := r.URL.Query().Get("project"); project != "" {
+		filters.ProjectID = project
+	}
+
+	// Parse api_key filter
+	if apiKey := r.URL.Query().Get("api_key"); apiKey != "" {
+		filters.APIKeyID = apiKey
+	}
+
+	// Parse command_type filter
+	if cmdType := r.URL.Query().Get("command_type"); cmdType != "" {
+		ct := domain.CommandType(cmdType)
+		switch ct {
+		case domain.CommandTypeClaude, domain.CommandTypeShell, domain.CommandTypeGit:
+			filters.CommandType = ct
+		default:
+			api.WriteBadRequest(w, r, "invalid command_type: must be claude, shell, or git")
+			return
+		}
+	}
+
+	// Parse status filter
+	if status := r.URL.Query().Get("status"); status != "" {
+		s := domain.AuditStatus(status)
+		if !s.IsValid() {
+			api.WriteBadRequest(w, r, "invalid status: must be running, success, error, or cancelled")
+			return
+		}
+		filters.Status = s
+	}
+
+	// Parse start time filter
+	if startStr := r.URL.Query().Get("start"); startStr != "" {
+		start, err := time.Parse(time.RFC3339, startStr)
+		if err != nil {
+			api.WriteBadRequest(w, r, "invalid start time: must be RFC3339 format")
+			return
+		}
+		filters.StartTime = &start
+	}
+
+	// Parse end time filter
+	if endStr := r.URL.Query().Get("end"); endStr != "" {
+		end, err := time.Parse(time.RFC3339, endStr)
+		if err != nil {
+			api.WriteBadRequest(w, r, "invalid end time: must be RFC3339 format")
+			return
+		}
+		filters.EndTime = &end
+	}
+
+	// Parse limit
+	if limitStr := r.URL.Query().Get("limit"); limitStr != "" {
+		limit, err := strconv.Atoi(limitStr)
+		if err != nil || limit < 1 {
+			api.WriteBadRequest(w, r, "invalid limit: must be a positive integer")
+			return
+		}
+		if limit > 1000 {
+			limit = 1000 // Cap at 1000
+		}
+		filters.Limit = limit
+	}
+
+	// Parse offset
+	if offsetStr := r.URL.Query().Get("offset"); offsetStr != "" {
+		offset, err := strconv.Atoi(offsetStr)
+		if err != nil || offset < 0 {
+			api.WriteBadRequest(w, r, "invalid offset: must be a non-negative integer")
+			return
+		}
+		filters.Offset = offset
+	}
+
+	entries, err := h.auditLogger.List(r.Context(), filters)
+	if err != nil {
+		api.WriteInternalError(w, r, "Failed to list audit logs")
+		return
+	}
+
+	resp := make([]AuditLogResponse, len(entries))
+	for i, entry := range entries {
+		resp[i] = auditLogToResponse(&entry)
+	}
+
+	api.WriteSuccess(w, r, ListAuditLogResponse{
+		Entries: resp,
+		Total:   len(resp),
+		Limit:   filters.Limit,
+		Offset:  filters.Offset,
+	})
+}
+
+// Get returns a single audit log entry by command ID.
+// GET /audit-log/{command_id}
+func (h *AuditHandler) Get(w http.ResponseWriter, r *http.Request) {
+	commandID := chi.URLParam(r, "command_id")
+	if commandID == "" {
+		api.WriteBadRequest(w, r, "command_id is required")
+		return
+	}
+
+	entry, err := h.auditLogger.Get(r.Context(), commandID)
+	if err != nil {
+		if errors.Is(err, domain.ErrAuditNotFound) {
+			api.WriteNotFound(w, r, "audit log entry not found")
+			return
+		}
+		api.WriteInternalError(w, r, "Failed to get audit log entry")
+		return
+	}
+
+	api.WriteSuccess(w, r, auditLogToResponse(entry))
+}
--- a/internal/handlers/audit_test.go
+++ b/internal/handlers/audit_test.go
@ -0,0 +1,274 @@
+package handlers
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/go-chi/chi/v5"
+	"github.com/orchard9/rdev/internal/domain"
+)
+
+// mockAuditLogger implements port.AuditLogger for testing.
+type mockAuditLogger struct {
+	entries []domain.AuditLogEntry
+	err     error
+}
+
+func (m *mockAuditLogger) LogCommandStart(ctx context.Context, entry *domain.AuditLogEntry) error {
+	return m.err
+}
+
+func (m *mockAuditLogger) LogCommandEnd(ctx context.Context, commandID string, result *domain.AuditResult) error {
+	return m.err
+}
+
+func (m *mockAuditLogger) List(ctx context.Context, filters domain.AuditFilters) ([]domain.AuditLogEntry, error) {
+	if m.err != nil {
+		return nil, m.err
+	}
+	return m.entries, nil
+}
+
+func (m *mockAuditLogger) Get(ctx context.Context, commandID string) (*domain.AuditLogEntry, error) {
+	if m.err != nil {
+		return nil, m.err
+	}
+	for _, e := range m.entries {
+		if e.CommandID == commandID {
+			return &e, nil
+		}
+	}
+	return nil, domain.ErrAuditNotFound
+}
+
+func TestAuditHandler_List(t *testing.T) {
+	now := time.Now()
+	entries := []domain.AuditLogEntry{
+		{
+			ID:          "audit-1",
+			CommandID:   "cmd-1",
+			ProjectID:   "proj-1",
+			CommandType: domain.CommandTypeClaude,
+			Status:      domain.AuditStatusSuccess,
+			StartedAt:   now,
+			CreatedAt:   now,
+		},
+		{
+			ID:          "audit-2",
+			CommandID:   "cmd-2",
+			ProjectID:   "proj-1",
+			CommandType: domain.CommandTypeShell,
+			Status:      domain.AuditStatusRunning,
+			StartedAt:   now,
+			CreatedAt:   now,
+		},
+	}
+
+	tests := []struct {
+		name       string
+		query      string
+		mock       *mockAuditLogger
+		wantStatus int
+		wantCount  int
+	}{
+		{
+			name:       "list all entries",
+			query:      "",
+			mock:       &mockAuditLogger{entries: entries},
+			wantStatus: http.StatusOK,
+			wantCount:  2,
+		},
+		{
+			name:       "filter by project",
+			query:      "?project=proj-1",
+			mock:       &mockAuditLogger{entries: entries},
+			wantStatus: http.StatusOK,
+			wantCount:  2,
+		},
+		{
+			name:       "invalid command_type",
+			query:      "?command_type=invalid",
+			mock:       &mockAuditLogger{entries: entries},
+			wantStatus: http.StatusBadRequest,
+		},
+		{
+			name:       "invalid status",
+			query:      "?status=invalid",
+			mock:       &mockAuditLogger{entries: entries},
+			wantStatus: http.StatusBadRequest,
+		},
+		{
+			name:       "invalid start time",
+			query:      "?start=invalid",
+			mock:       &mockAuditLogger{entries: entries},
+			wantStatus: http.StatusBadRequest,
+		},
+		{
+			name:       "invalid limit",
+			query:      "?limit=-1",
+			mock:       &mockAuditLogger{entries: entries},
+			wantStatus: http.StatusBadRequest,
+		},
+		{
+			name:       "invalid offset",
+			query:      "?offset=-1",
+			mock:       &mockAuditLogger{entries: entries},
+			wantStatus: http.StatusBadRequest,
+		},
+		{
+			name:       "valid limit and offset",
+			query:      "?limit=10&offset=0",
+			mock:       &mockAuditLogger{entries: entries},
+			wantStatus: http.StatusOK,
+			wantCount:  2,
+		},
+		{
+			name:       "empty result",
+			query:      "",
+			mock:       &mockAuditLogger{entries: nil},
+			wantStatus: http.StatusOK,
+			wantCount:  0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			h := NewAuditHandler(tt.mock)
+
+			req := httptest.NewRequest(http.MethodGet, "/audit-log"+tt.query, nil)
+			w := httptest.NewRecorder()
+
+			h.List(w, req)
+
+			if w.Code != tt.wantStatus {
+				t.Errorf("List() status = %d, want %d", w.Code, tt.wantStatus)
+			}
+
+			if tt.wantStatus == http.StatusOK {
+				var resp struct {
+					Data ListAuditLogResponse `json:"data"`
+				}
+				if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+					t.Fatalf("failed to unmarshal response: %v", err)
+				}
+				if len(resp.Data.Entries) != tt.wantCount {
+					t.Errorf("List() count = %d, want %d", len(resp.Data.Entries), tt.wantCount)
+				}
+			}
+		})
+	}
+}
+
+func TestAuditHandler_Get(t *testing.T) {
+	now := time.Now()
+	entries := []domain.AuditLogEntry{
+		{
+			ID:          "audit-1",
+			CommandID:   "cmd-123",
+			ProjectID:   "proj-1",
+			CommandType: domain.CommandTypeClaude,
+			Status:      domain.AuditStatusSuccess,
+			StartedAt:   now,
+			CreatedAt:   now,
+		},
+	}
+
+	tests := []struct {
+		name       string
+		commandID  string
+		mock       *mockAuditLogger
+		wantStatus int
+	}{
+		{
+			name:       "existing entry",
+			commandID:  "cmd-123",
+			mock:       &mockAuditLogger{entries: entries},
+			wantStatus: http.StatusOK,
+		},
+		{
+			name:       "non-existent entry",
+			commandID:  "cmd-unknown",
+			mock:       &mockAuditLogger{entries: entries},
+			wantStatus: http.StatusNotFound,
+		},
+		{
+			name:       "empty command_id",
+			commandID:  "",
+			mock:       &mockAuditLogger{entries: entries},
+			wantStatus: http.StatusBadRequest,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			h := NewAuditHandler(tt.mock)
+
+			r := chi.NewRouter()
+			r.Get("/audit-log/{command_id}", h.Get)
+
+			path := "/audit-log/" + tt.commandID
+			if tt.commandID == "" {
+				// Test with empty path param
+				r.Get("/audit-log/", h.Get)
+				path = "/audit-log/"
+			}
+
+			req := httptest.NewRequest(http.MethodGet, path, nil)
+			w := httptest.NewRecorder()
+
+			r.ServeHTTP(w, req)
+
+			if w.Code != tt.wantStatus {
+				t.Errorf("Get() status = %d, want %d", w.Code, tt.wantStatus)
+			}
+		})
+	}
+}
+
+func TestAuditLogToResponse(t *testing.T) {
+	now := time.Now()
+	completedAt := now.Add(time.Second)
+	exitCode := 0
+	durationMs := int64(1000)
+
+	entry := &domain.AuditLogEntry{
+		ID:              "audit-1",
+		APIKeyID:        "key-1",
+		CommandID:       "cmd-1",
+		ProjectID:       "proj-1",
+		CommandType:     domain.CommandTypeClaude,
+		Args:            "some args",
+		ClientIP:        "127.0.0.1",
+		UserAgent:       "test-agent",
+		StartedAt:       now,
+		CompletedAt:     &completedAt,
+		ExitCode:        &exitCode,
+		DurationMs:      &durationMs,
+		Status:          domain.AuditStatusSuccess,
+		ErrorMessage:    "",
+		OutputSizeBytes: 1024,
+		CreatedAt:       now,
+	}
+
+	resp := auditLogToResponse(entry)
+
+	if resp.ID != entry.ID {
+		t.Errorf("ID = %s, want %s", resp.ID, entry.ID)
+	}
+	if resp.CommandID != entry.CommandID {
+		t.Errorf("CommandID = %s, want %s", resp.CommandID, entry.CommandID)
+	}
+	if resp.ExitCode == nil || *resp.ExitCode != exitCode {
+		t.Errorf("ExitCode = %v, want %d", resp.ExitCode, exitCode)
+	}
+	if resp.DurationMs == nil || *resp.DurationMs != durationMs {
+		t.Errorf("DurationMs = %v, want %d", resp.DurationMs, durationMs)
+	}
+	if resp.CompletedAt == nil {
+		t.Error("CompletedAt should not be nil")
+	}
+}
--- a/internal/handlers/claude_config.go
+++ b/internal/handlers/claude_config.go
@ -2,22 +2,23 @@
 package handlers

 import (
+	"context"
 	"encoding/base64"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"net/http"
-	"regexp"
 	"strings"
+	"time"

 	"github.com/go-chi/chi/v5"
-	"github.com/orchard9/rdev/internal/executor"
-	"github.com/orchard9/rdev/internal/projects"
+	"github.com/orchard9/rdev/internal/adapter/kubernetes"
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/service"
+	"github.com/orchard9/rdev/internal/validate"
 	"github.com/orchard9/rdev/pkg/api"
 )

-// Package-level compiled regex for name validation (performance optimization).
-var validNameRegex = regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
-
 // maxContentSize limits the size of content that can be written (1MB).
 const maxContentSize = 1 << 20

@ -25,14 +26,29 @@ const maxContentSize = 1 << 20
 // Commands, skills, and agents live in /workspace/.claude/ (per-project, in git).
 // Credentials live in /root/.claude/ (shared PVC).
 type ClaudeConfigHandler struct {
-	registry *projects.Registry
-	executor *executor.Executor
+	projectRepo    *kubernetes.ProjectRepository
+	executor       *kubernetes.Executor
+	projectService *service.ProjectService
 }

-// NewClaudeConfigHandler creates a new claude config handler.
-func NewClaudeConfigHandler(registry *projects.Registry, exec *executor.Executor) *ClaudeConfigHandler {
+// NewClaudeConfigHandler creates a new claude config handler with injected dependencies.
+func NewClaudeConfigHandler(projectRepo *kubernetes.ProjectRepository, exec *kubernetes.Executor) *ClaudeConfigHandler {
 	return &ClaudeConfigHandler{
-		registry: registry,
+		projectRepo: projectRepo,
+		executor:    exec,
+	}
+}
+
+// NewClaudeConfigHandlerWithService creates a new claude config handler with injected dependencies.
+// This maintains proper DI by receiving all dependencies from the caller.
+func NewClaudeConfigHandlerWithService(
+	projectService *service.ProjectService,
+	projectRepo *kubernetes.ProjectRepository,
+	exec *kubernetes.Executor,
+) *ClaudeConfigHandler {
+	return &ClaudeConfigHandler{
+		projectService: projectService,
+		projectRepo:    projectRepo,
 		executor:       exec,
 	}
 }
@ -80,11 +96,15 @@ type ConfigOverview struct {
 func (h *ClaudeConfigHandler) Overview(w http.ResponseWriter, r *http.Request) {
 	id := chi.URLParam(r, "id")

-	project, ok := h.registry.Get(id)
-	if !ok {
+	project, err := h.getProject(r.Context(), domain.ProjectID(id))
+	if err != nil {
+		if errors.Is(err, domain.ErrProjectNotFound) {
 			api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
 			return
 		}
+		api.WriteInternalError(w, r, "failed to get project")
+		return
+	}

 	overview := ConfigOverview{
 		Project:  id,
@ -229,11 +249,15 @@ func (h *ClaudeConfigHandler) listItems(pod, itemType string) []string {
 func (h *ClaudeConfigHandler) listType(w http.ResponseWriter, r *http.Request, itemType string) {
 	id := chi.URLParam(r, "id")

-	project, ok := h.registry.Get(id)
-	if !ok {
+	project, err := h.getProject(r.Context(), domain.ProjectID(id))
+	if err != nil {
+		if errors.Is(err, domain.ErrProjectNotFound) {
 			api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
 			return
 		}
+		api.WriteInternalError(w, r, "failed to get project")
+		return
+	}

 	items := h.listItems(project.PodName, itemType)
 	api.WriteSuccess(w, r, items)
@ -243,11 +267,15 @@ func (h *ClaudeConfigHandler) listType(w http.ResponseWriter, r *http.Request, i
 func (h *ClaudeConfigHandler) createItem(w http.ResponseWriter, r *http.Request, itemType string) {
 	id := chi.URLParam(r, "id")

-	project, ok := h.registry.Get(id)
-	if !ok {
+	project, err := h.getProject(r.Context(), domain.ProjectID(id))
+	if err != nil {
+		if errors.Is(err, domain.ErrProjectNotFound) {
 			api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
 			return
 		}
+		api.WriteInternalError(w, r, "failed to get project")
+		return
+	}

 	// Limit request body size to prevent DoS
 	r.Body = http.MaxBytesReader(w, r.Body, maxContentSize)
@ -258,19 +286,17 @@ func (h *ClaudeConfigHandler) createItem(w http.ResponseWriter, r *http.Request,
 		return
 	}

-	if req.Name == "" {
-		api.WriteBadRequest(w, r, "name is required")
+	v := validate.New()
+	v.Required(req.Name, "name")
+	v.Required(req.Content, "content")
+	if err := v.Error(); err != nil {
+		api.WriteBadRequest(w, r, err.Error())
 		return
 	}

-	if req.Content == "" {
-		api.WriteBadRequest(w, r, "content is required")
-		return
-	}
-
-	// Validate name (alphanumeric, dashes, underscores only)
-	if !isValidName(req.Name) {
-		api.WriteBadRequest(w, r, "name must be alphanumeric with dashes or underscores")
+	// Validate name (alphanumeric, dashes, underscores only, 1-64 chars)
+	if err := validate.Name(req.Name, "name"); err != nil {
+		api.WriteBadRequest(w, r, err.Error())
 		return
 	}

@ -305,14 +331,18 @@ func (h *ClaudeConfigHandler) getItem(w http.ResponseWriter, r *http.Request, it
 	id := chi.URLParam(r, "id")
 	name := chi.URLParam(r, "name")

-	project, ok := h.registry.Get(id)
-	if !ok {
+	project, err := h.getProject(r.Context(), domain.ProjectID(id))
+	if err != nil {
+		if errors.Is(err, domain.ErrProjectNotFound) {
 			api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
 			return
 		}
+		api.WriteInternalError(w, r, "failed to get project")
+		return
+	}

-	if !isValidName(name) {
-		api.WriteBadRequest(w, r, "invalid name")
+	if err := validate.Name(name, "name"); err != nil {
+		api.WriteBadRequest(w, r, err.Error())
 		return
 	}

@ -338,14 +368,18 @@ func (h *ClaudeConfigHandler) updateItem(w http.ResponseWriter, r *http.Request,
 	id := chi.URLParam(r, "id")
 	name := chi.URLParam(r, "name")

-	project, ok := h.registry.Get(id)
-	if !ok {
+	project, err := h.getProject(r.Context(), domain.ProjectID(id))
+	if err != nil {
+		if errors.Is(err, domain.ErrProjectNotFound) {
 			api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
 			return
 		}
+		api.WriteInternalError(w, r, "failed to get project")
+		return
+	}

-	if !isValidName(name) {
-		api.WriteBadRequest(w, r, "invalid name")
+	if err := validate.Name(name, "name"); err != nil {
+		api.WriteBadRequest(w, r, err.Error())
 		return
 	}

@ -358,8 +392,8 @@ func (h *ClaudeConfigHandler) updateItem(w http.ResponseWriter, r *http.Request,
 		return
 	}

-	if req.Content == "" {
-		api.WriteBadRequest(w, r, "content is required")
+	if err := validate.Required(req.Content, "content"); err != nil {
+		api.WriteBadRequest(w, r, err.Error())
 		return
 	}

@ -394,14 +428,18 @@ func (h *ClaudeConfigHandler) deleteItem(w http.ResponseWriter, r *http.Request,
 	id := chi.URLParam(r, "id")
 	name := chi.URLParam(r, "name")

-	project, ok := h.registry.Get(id)
-	if !ok {
+	project, err := h.getProject(r.Context(), domain.ProjectID(id))
+	if err != nil {
+		if errors.Is(err, domain.ErrProjectNotFound) {
 			api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
 			return
 		}
+		api.WriteInternalError(w, r, "failed to get project")
+		return
+	}

-	if !isValidName(name) {
-		api.WriteBadRequest(w, r, "invalid name")
+	if err := validate.Name(name, "name"); err != nil {
+		api.WriteBadRequest(w, r, err.Error())
 		return
 	}

@ -425,12 +463,22 @@ func (h *ClaudeConfigHandler) deleteItem(w http.ResponseWriter, r *http.Request,
 	api.WriteSuccess(w, r, map[string]string{"deleted": name})
 }

-// isValidName checks if a name is safe for use in file paths.
-func isValidName(name string) bool {
-	if name == "" || len(name) > 64 {
-		return false
+// getProject retrieves a project by ID using available methods.
+// It prefers the project service if available, otherwise falls back to the project repository.
+func (h *ClaudeConfigHandler) getProject(ctx context.Context, id domain.ProjectID) (*domain.Project, error) {
+	// Add timeout for project lookup
+	ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
+	defer cancel()
+
+	// Use service if available
+	if h.projectService != nil {
+		return h.projectService.Get(ctx, id)
 	}
-	// Only allow alphanumeric, dashes, and underscores
-	// Uses package-level compiled regex for performance
-	return validNameRegex.MatchString(name)
+
+	// Fall back to direct repository access
+	if h.projectRepo != nil {
+		return h.projectRepo.Get(ctx, id)
+	}
+
+	return nil, domain.ErrProjectNotFound
 }
--- a/internal/handlers/claude_config_test.go
+++ b/internal/handlers/claude_config_test.go
@ -2,6 +2,7 @@ package handlers

 import (
 	"bytes"
+	"context"
 	"encoding/base64"
 	"encoding/json"
 	"errors"
@ -12,8 +13,9 @@ import (
 	"testing"

 	"github.com/go-chi/chi/v5"
-	"github.com/orchard9/rdev/internal/executor"
-	"github.com/orchard9/rdev/internal/projects"
+	"github.com/orchard9/rdev/internal/adapter/kubernetes"
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/validate"
 )

 // MockSimpleExecutor mocks the executor for testing ClaudeConfigHandler.
@ -94,77 +96,55 @@ func (m *MockSimpleExecutor) Reset() {
 	m.defaultResult = ExecSimpleResult{}
 }

-// mockExecutorWrapper wraps MockSimpleExecutor to implement the full Executor interface.
-// This is needed because ClaudeConfigHandler expects *executor.Executor.
-// We'll use a test-specific approach instead.
+// --- Tests for validate.Name ---

-// testClaudeConfigHandler creates a handler with mock capabilities for testing.
-type testClaudeConfigHandler struct {
-	registry *projects.Registry
-	mock     *MockSimpleExecutor
-}
-
-func newTestClaudeConfigHandler() *testClaudeConfigHandler {
-	reg := projects.NewRegistry("test-namespace")
-	mock := NewMockSimpleExecutor()
-	return &testClaudeConfigHandler{
-		registry: reg,
-		mock:     mock,
-	}
-}
-
-// Since ClaudeConfigHandler uses *executor.Executor directly, we need to refactor
-// the test approach. Let's create tests that work with the actual handler structure
-// but use dependency injection for the executor calls.
-
-// --- Tests for isValidName ---
-
-func TestIsValidName(t *testing.T) {
+func TestValidateName(t *testing.T) {
 	tests := []struct {
 		name    string
 		input   string
-		want  bool
+		wantErr bool
 	}{
 		// Valid names
-		{"simple lowercase", "mycommand", true},
-		{"with dashes", "my-command", true},
-		{"with underscores", "my_command", true},
-		{"with numbers", "command123", true},
-		{"mixed case", "MyCommand", true},
-		{"complex valid", "My-Command_123", true},
-		{"single char", "a", true},
-		{"numbers only", "123", true},
-		{"64 chars", strings.Repeat("a", 64), true},
+		{"simple lowercase", "mycommand", false},
+		{"with dashes", "my-command", false},
+		{"with underscores", "my_command", false},
+		{"with numbers", "command123", false},
+		{"mixed case", "MyCommand", false},
+		{"complex valid", "My-Command_123", false},
+		{"single char", "a", false},
+		{"numbers only", "123", false},
+		{"64 chars", strings.Repeat("a", 64), false},

 		// Invalid names
-		{"empty string", "", false},
-		{"65 chars", strings.Repeat("a", 65), false},
-		{"100 chars", strings.Repeat("a", 100), false},
-		{"with spaces", "my command", false},
-		{"with dots", "my.command", false},
-		{"path traversal", "../etc", false},
-		{"double path traversal", "../../etc", false},
-		{"with slash", "path/to/file", false},
-		{"with backslash", "path\\to\\file", false},
-		{"with semicolon", "cmd;rm", false},
-		{"with pipe", "cmd|cat", false},
-		{"with backtick", "cmd`whoami`", false},
-		{"with dollar", "$HOME", false},
-		{"with ampersand", "cmd&cmd", false},
-		{"with newline", "cmd\ncmd", false},
-		{"with tab", "cmd\tcmd", false},
-		{"with null byte", "cmd\x00cmd", false},
-		{"unicode chars", "command\u00e9", false},
-		{"emoji", "command\U0001F600", false},
-		{"leading dash", "-command", true}, // Actually valid per regex
-		{"leading underscore", "_command", true},
+		{"empty string", "", true},
+		{"65 chars", strings.Repeat("a", 65), true},
+		{"100 chars", strings.Repeat("a", 100), true},
+		{"with spaces", "my command", true},
+		{"with dots", "my.command", true},
+		{"path traversal", "../etc", true},
+		{"double path traversal", "../../etc", true},
+		{"with slash", "path/to/file", true},
+		{"with backslash", "path\\to\\file", true},
+		{"with semicolon", "cmd;rm", true},
+		{"with pipe", "cmd|cat", true},
+		{"with backtick", "cmd`whoami`", true},
+		{"with dollar", "$HOME", true},
+		{"with ampersand", "cmd&cmd", true},
+		{"with newline", "cmd\ncmd", true},
+		{"with tab", "cmd\tcmd", true},
+		{"with null byte", "cmd\x00cmd", true},
+		{"unicode chars", "command\u00e9", true},
+		{"emoji", "command\U0001F600", true},
+		{"leading dash", "-command", false}, // Actually valid per regex
+		{"leading underscore", "_command", false},
 	}

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			got := isValidName(tt.input)
-			if got != tt.want {
-				t.Errorf("isValidName(%q) = %v, want %v", tt.input, got, tt.want)
+			err := validate.Name(tt.input, "name")
+			gotErr := err != nil
+			if gotErr != tt.wantErr {
+				t.Errorf("validate.Name(%q) error = %v, wantErr %v", tt.input, err, tt.wantErr)
 			}
 		})
 	}
@ -175,29 +155,29 @@ func TestIsValidName(t *testing.T) {
 // setupTestRouter creates a chi router with the handler mounted.
 // Since we can't easily mock the executor in the current design,
 // these tests will verify the validation and error handling paths.
-func setupTestRouter(t *testing.T) (*chi.Mux, *projects.Registry) {
+func setupTestRouter(t *testing.T) (*chi.Mux, *kubernetes.ProjectRepository) {
 	t.Helper()

-	// Create a registry with test projects
-	reg := projects.NewRegistry("test-namespace")
-	reg.Register(&projects.Project{
+	// Create a repository with test projects
+	repo := kubernetes.NewProjectRepository("test-namespace")
+	_ = repo.Register(context.Background(), &domain.Project{
 		ID:          "test-project",
 		Name:        "Test Project",
 		Description: "A test project",
 		PodName:     "test-pod-0",
-		Status:      "running",
+		Status:      domain.ProjectStatusRunning,
 		Workspace:   "/workspace",
 	})

 	// Create executor (will fail on actual kubectl calls in tests, but
 	// we can test validation logic that happens before executor calls)
-	exec := executor.New("test-namespace")
+	exec := kubernetes.NewExecutor("test-namespace")

-	handler := NewClaudeConfigHandler(reg, exec)
+	handler := NewClaudeConfigHandler(repo, exec)
 	router := chi.NewRouter()
 	handler.Mount(router)

-	return router, reg
+	return router, repo
 }

 // --- Tests for project not found scenarios ---
@ -304,8 +284,9 @@ func TestClaudeConfigHandler_InvalidName(t *testing.T) {
 					t.Errorf("Status = %d, want 400. Body: %s", rec.Code, rec.Body.String())
 				}

-				if !strings.Contains(rec.Body.String(), "invalid name") {
-					t.Errorf("Body = %q, want to contain 'invalid name'", rec.Body.String())
+				// validate.Name returns errors like "name: must be at most 64 characters" or "name: must be alphanumeric..."
+				if !strings.Contains(rec.Body.String(), "name:") {
+					t.Errorf("Body = %q, want to contain 'name:'", rec.Body.String())
 				}
 			})
 		}
@ -356,25 +337,25 @@ func TestClaudeConfigHandler_CreateValidation(t *testing.T) {
 			name:       "missing name",
 			body:       `{"content":"test content"}`,
 			wantStatus: http.StatusBadRequest,
-			wantErr:    "name is required",
+			wantErr:    "name: is required",
 		},
 		{
 			name:       "empty name",
 			body:       `{"name":"","content":"test content"}`,
 			wantStatus: http.StatusBadRequest,
-			wantErr:    "name is required",
+			wantErr:    "name: is required",
 		},
 		{
 			name:       "missing content",
 			body:       `{"name":"test-command"}`,
 			wantStatus: http.StatusBadRequest,
-			wantErr:    "content is required",
+			wantErr:    "content: is required",
 		},
 		{
 			name:       "empty content",
 			body:       `{"name":"test-command","content":""}`,
 			wantStatus: http.StatusBadRequest,
-			wantErr:    "content is required",
+			wantErr:    "content: is required",
 		},
 		{
 			name:       "invalid name characters",
@ -386,7 +367,7 @@ func TestClaudeConfigHandler_CreateValidation(t *testing.T) {
 			name:       "name too long",
 			body:       fmt.Sprintf(`{"name":"%s","content":"test"}`, strings.Repeat("a", 65)),
 			wantStatus: http.StatusBadRequest,
-			wantErr:    "alphanumeric",
+			wantErr:    "must be at most 64 characters",
 		},
 		{
 			name:       "invalid JSON",
@ -438,14 +419,14 @@ func TestClaudeConfigHandler_UpdateValidation(t *testing.T) {
 			itemName:   "valid-name",
 			body:       `{}`,
 			wantStatus: http.StatusBadRequest,
-			wantErr:    "content is required",
+			wantErr:    "content: is required",
 		},
 		{
 			name:       "empty content",
 			itemName:   "valid-name",
 			body:       `{"content":""}`,
 			wantStatus: http.StatusBadRequest,
-			wantErr:    "content is required",
+			wantErr:    "content: is required",
 		},
 		{
 			name:       "invalid JSON",
@ -546,8 +527,8 @@ func TestClaudeConfigHandler_ValidNames(t *testing.T) {

 	for _, name := range validNames {
 		t.Run("valid: "+name, func(t *testing.T) {
-			if !isValidName(name) {
-				t.Errorf("isValidName(%q) = false, want true", name)
+			if err := validate.Name(name, "name"); err != nil {
+				t.Errorf("validate.Name(%q) returned error: %v, want nil", name, err)
 			}
 		})
 	}
@ -712,16 +693,16 @@ func TestConfigOverview_JSON(t *testing.T) {
 // --- Tests for NewClaudeConfigHandler ---

 func TestNewClaudeConfigHandler(t *testing.T) {
-	reg := projects.NewRegistry("test-namespace")
-	exec := executor.New("test-namespace")
+	repo := kubernetes.NewProjectRepository("test-namespace")
+	exec := kubernetes.NewExecutor("test-namespace")

-	handler := NewClaudeConfigHandler(reg, exec)
+	handler := NewClaudeConfigHandler(repo, exec)

 	if handler == nil {
 		t.Fatal("NewClaudeConfigHandler returned nil")
 	}
-	if handler.registry != reg {
-		t.Error("Handler registry not set correctly")
+	if handler.projectRepo != repo {
+		t.Error("Handler projectRepo not set correctly")
 	}
 	if handler.executor != exec {
 		t.Error("Handler executor not set correctly")
@ -796,12 +777,12 @@ func TestMaxContentSize(t *testing.T) {
 	}
 }

-// --- Tests for validNameRegex pattern ---
+// --- Tests for validate.AlphanumericDashUnderscore pattern ---

-func TestValidNameRegex(t *testing.T) {
-	// Test that the regex is compiled and available
-	if validNameRegex == nil {
-		t.Fatal("validNameRegex is nil")
+func TestAlphanumericDashUnderscorePattern(t *testing.T) {
+	// Test that the regex is compiled and available in validate package
+	if validate.AlphanumericDashUnderscore == nil {
+		t.Fatal("validate.AlphanumericDashUnderscore is nil")
 	}

 	// Test pattern matching directly
@ -821,16 +802,16 @@ func TestValidNameRegex(t *testing.T) {
 	}

 	for _, tt := range tests {
-		got := validNameRegex.MatchString(tt.input)
+		got := validate.AlphanumericDashUnderscore.MatchString(tt.input)
 		if got != tt.want {
-			t.Errorf("validNameRegex.MatchString(%q) = %v, want %v", tt.input, got, tt.want)
+			t.Errorf("validate.AlphanumericDashUnderscore.MatchString(%q) = %v, want %v", tt.input, got, tt.want)
 		}
 	}
 }

 // --- Benchmark tests ---

-func BenchmarkIsValidName(b *testing.B) {
+func BenchmarkValidateName(b *testing.B) {
 	names := []string{
 		"my-command",
 		"skill_123",
@ -842,7 +823,7 @@ func BenchmarkIsValidName(b *testing.B) {
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		for _, name := range names {
-			isValidName(name)
+			_ = validate.Name(name, "name")
 		}
 	}
 }
@ -934,20 +915,20 @@ func TestClaudeConfigHandler_ErrorMessages(t *testing.T) {
 			method:      "POST",
 			path:        "/projects/test-project/claude-config/commands",
 			body:        `{"content":"test"}`,
-			wantMessage: "name is required",
+			wantMessage: "name: is required",
 		},
 		{
 			name:        "content required message",
 			method:      "POST",
 			path:        "/projects/test-project/claude-config/commands",
 			body:        `{"name":"test"}`,
-			wantMessage: "content is required",
+			wantMessage: "content: is required",
 		},
 		{
 			name:        "invalid name message",
 			method:      "GET",
 			path:        "/projects/test-project/claude-config/commands/" + strings.Repeat("x", 65),
-			wantMessage: "invalid name",
+			wantMessage: "name: must be at most 64 characters",
 		},
 	}

@ -990,9 +971,9 @@ func TestClaudeConfigHandler_Security(t *testing.T) {
 		}

 		for _, attack := range attacks {
-			// isValidName should reject all of these
-			if isValidName(attack) {
-				t.Errorf("isValidName accepted path traversal: %q", attack)
+			// validate.Name should reject all of these
+			if err := validate.Name(attack, "name"); err == nil {
+				t.Errorf("validate.Name accepted path traversal: %q", attack)
 			}
 		}
 	})
@ -1009,8 +990,8 @@ func TestClaudeConfigHandler_Security(t *testing.T) {
 		}

 		for _, attack := range attacks {
-			if isValidName(attack) {
-				t.Errorf("isValidName accepted command injection: %q", attack)
+			if err := validate.Name(attack, "name"); err == nil {
+				t.Errorf("validate.Name accepted command injection: %q", attack)
 			}
 		}
 	})
@ -1034,7 +1015,7 @@ EOF`

 // --- MockableClaudeConfigHandler for testing with mock executor ---

-// Since the actual handler uses *executor.Executor which calls kubectl,
+// Since the actual handler uses *kubernetes.Executor which calls kubectl,
 // we create a version that can use a mock for comprehensive testing.

 // MockExecSimpler is an interface for the ExecSimple method only.
@ -1044,7 +1025,7 @@ type MockExecSimpler interface {

 // testableClaudeConfigHandler wraps the logic for testing with a mock.
 type testableClaudeConfigHandler struct {
-	registry *projects.Registry
+	projectRepo *kubernetes.ProjectRepository
 	execFn      func(podName, command string) (string, error)
 }

@ -1065,8 +1046,8 @@ func (h *testableClaudeConfigHandler) listItems(pod, itemType string) []string {
 }

 func TestListItems_WithMock(t *testing.T) {
-	reg := projects.NewRegistry("test")
-	reg.Register(&projects.Project{ID: "test", PodName: "test-pod"})
+	repo := kubernetes.NewProjectRepository("test")
+	_ = repo.Register(context.Background(), &domain.Project{ID: "test", PodName: "test-pod"})

 	tests := []struct {
 		name     string
@ -1109,7 +1090,7 @@ func TestListItems_WithMock(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			h := &testableClaudeConfigHandler{
-				registry: reg,
+				projectRepo: repo,
 				execFn: func(podName, command string) (string, error) {
 					return tt.output, tt.err
 				},
--- a/internal/handlers/health.go
+++ b/internal/handlers/health.go
@ -0,0 +1,155 @@
+// Package handlers provides HTTP handlers for the rdev API.
+package handlers
+
+import (
+	"context"
+	"database/sql"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/orchard9/rdev/pkg/api"
+	k8sclient "k8s.io/client-go/kubernetes"
+)
+
+// HealthHandler handles health and readiness checks.
+type HealthHandler struct {
+	serviceName string
+	db          *sql.DB
+	k8sClient   *k8sclient.Clientset
+}
+
+// NewHealthHandler creates a new health handler with dependencies.
+func NewHealthHandler(serviceName string, db *sql.DB, k8sClient *k8sclient.Clientset) *HealthHandler {
+	return &HealthHandler{
+		serviceName: serviceName,
+		db:          db,
+		k8sClient:   k8sClient,
+	}
+}
+
+// Health returns a simple liveness check.
+// This should be lightweight and only fail if the process is unhealthy.
+// GET /health
+func (h *HealthHandler) Health(w http.ResponseWriter, r *http.Request) {
+	api.WriteSuccess(w, r, map[string]string{
+		"status":  "ok",
+		"service": h.serviceName,
+	})
+}
+
+// Ready returns a readiness check with dependency health.
+// This checks all required dependencies (database, k8s) and returns
+// 503 if any are unhealthy.
+// GET /ready
+func (h *HealthHandler) Ready(w http.ResponseWriter, r *http.Request) {
+	ctx, cancel := context.WithTimeout(r.Context(), 5*time.Second)
+	defer cancel()
+
+	checks := make(map[string]CheckResult)
+	allHealthy := true
+
+	// Database check
+	if h.db != nil {
+		dbCheck := h.checkDatabase(ctx)
+		checks["database"] = dbCheck
+		if !dbCheck.Healthy {
+			allHealthy = false
+		}
+	}
+
+	// Kubernetes check
+	if h.k8sClient != nil {
+		k8sCheck := h.checkKubernetes(ctx)
+		checks["kubernetes"] = k8sCheck
+		if !k8sCheck.Healthy {
+			allHealthy = false
+		}
+	}
+
+	response := ReadinessResponse{
+		Status:  "ready",
+		Service: h.serviceName,
+		Checks:  checks,
+	}
+
+	if !allHealthy {
+		response.Status = "not_ready"
+		api.WriteError(w, r, http.StatusServiceUnavailable, "NOT_READY",
+			"Service not ready - one or more checks failed")
+		return
+	}
+
+	api.WriteSuccess(w, r, response)
+}
+
+// checkDatabase performs a database health check.
+func (h *HealthHandler) checkDatabase(ctx context.Context) CheckResult {
+	start := time.Now()
+	err := h.db.PingContext(ctx)
+	latency := time.Since(start)
+
+	if err != nil {
+		return CheckResult{
+			Healthy:   false,
+			Message:   "connection failed: " + err.Error(),
+			Latency:   latency.String(),
+			LastCheck: time.Now().UTC(),
+		}
+	}
+
+	return CheckResult{
+		Healthy:   true,
+		Message:   "connected",
+		Latency:   latency.String(),
+		LastCheck: time.Now().UTC(),
+	}
+}
+
+// checkKubernetes performs a Kubernetes API health check.
+func (h *HealthHandler) checkKubernetes(ctx context.Context) CheckResult {
+	start := time.Now()
+
+	// Try to get server version - lightweight API call
+	_, err := h.k8sClient.Discovery().ServerVersion()
+	latency := time.Since(start)
+
+	if err != nil {
+		// Check if it's a timeout or connection error
+		msg := err.Error()
+		if strings.Contains(msg, "timeout") || strings.Contains(msg, "deadline") {
+			msg = "connection timeout"
+		} else if strings.Contains(msg, "refused") {
+			msg = "connection refused"
+		}
+
+		return CheckResult{
+			Healthy:   false,
+			Message:   msg,
+			Latency:   latency.String(),
+			LastCheck: time.Now().UTC(),
+		}
+	}
+
+	return CheckResult{
+		Healthy:   true,
+		Message:   "connected",
+		Latency:   latency.String(),
+		LastCheck: time.Now().UTC(),
+	}
+}
+
+// CheckResult represents the result of a health check.
+type CheckResult struct {
+	Healthy   bool      `json:"healthy"`
+	Message   string    `json:"message"`
+	Latency   string    `json:"latency,omitempty"`
+	LastCheck time.Time `json:"last_check"`
+}
+
+// ReadinessResponse is the response for the /ready endpoint.
+type ReadinessResponse struct {
+	Status  string                 `json:"status"`
+	Service string                 `json:"service"`
+	Checks  map[string]CheckResult `json:"checks,omitempty"`
+}
--- a/internal/handlers/health_test.go
+++ b/internal/handlers/health_test.go
@ -0,0 +1,91 @@
+package handlers
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+)
+
+func TestHealthHandler_Health(t *testing.T) {
+	h := NewHealthHandler("test-service", nil, nil)
+
+	req := httptest.NewRequest("GET", "/health", nil)
+	rec := httptest.NewRecorder()
+
+	h.Health(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Errorf("Health() status = %d, want %d", rec.Code, http.StatusOK)
+	}
+
+	var resp map[string]any
+	if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil {
+		t.Fatalf("failed to decode response: %v", err)
+	}
+
+	data, ok := resp["data"].(map[string]any)
+	if !ok {
+		t.Fatalf("response missing data field")
+	}
+
+	if data["status"] != "ok" {
+		t.Errorf("status = %q, want %q", data["status"], "ok")
+	}
+	if data["service"] != "test-service" {
+		t.Errorf("service = %q, want %q", data["service"], "test-service")
+	}
+}
+
+func TestHealthHandler_Ready_NoDependencies(t *testing.T) {
+	// Handler with no dependencies should always be ready
+	h := NewHealthHandler("test-service", nil, nil)
+
+	req := httptest.NewRequest("GET", "/ready", nil)
+	rec := httptest.NewRecorder()
+
+	h.Ready(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Errorf("Ready() status = %d, want %d", rec.Code, http.StatusOK)
+	}
+
+	var resp map[string]any
+	if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil {
+		t.Fatalf("failed to decode response: %v", err)
+	}
+
+	data, ok := resp["data"].(map[string]any)
+	if !ok {
+		t.Fatalf("response missing data field")
+	}
+
+	if data["status"] != "ready" {
+		t.Errorf("status = %q, want %q", data["status"], "ready")
+	}
+}
+
+func TestCheckResult_JSON(t *testing.T) {
+	result := CheckResult{
+		Healthy: true,
+		Message: "connected",
+		Latency: "1ms",
+	}
+
+	data, err := json.Marshal(result)
+	if err != nil {
+		t.Fatalf("failed to marshal: %v", err)
+	}
+
+	var decoded CheckResult
+	if err := json.Unmarshal(data, &decoded); err != nil {
+		t.Fatalf("failed to unmarshal: %v", err)
+	}
+
+	if decoded.Healthy != result.Healthy {
+		t.Errorf("Healthy = %v, want %v", decoded.Healthy, result.Healthy)
+	}
+	if decoded.Message != result.Message {
+		t.Errorf("Message = %q, want %q", decoded.Message, result.Message)
+	}
+}
--- a/internal/handlers/keys.go
+++ b/internal/handlers/keys.go
@ -2,10 +2,12 @@ package handlers

 import (
 	"encoding/json"
+	"net"
 	"net/http"

 	"github.com/go-chi/chi/v5"
 	"github.com/orchard9/rdev/internal/auth"
+	"github.com/orchard9/rdev/internal/validate"
 	"github.com/orchard9/rdev/pkg/api"
 )

@ -36,6 +38,7 @@ type CreateKeyRequest struct {
 	Scopes     []string `json:"scopes"`
 	ProjectIDs []string `json:"project_ids,omitempty"` // null = all projects
 	ExpiresIn  string   `json:"expires_in,omitempty"`  // "30d", "60d", "90d", "1y", "never"
+	AllowedIPs []string `json:"allowed_ips,omitempty"` // CIDR notation, e.g., ["192.168.1.0/24"]; null = no restriction
 }

 // KeyResponse is the JSON response for a key (without secret).
@ -45,6 +48,7 @@ type KeyResponse struct {
 	KeyPrefix  string   `json:"key_prefix"`
 	Scopes     []string `json:"scopes"`
 	ProjectIDs []string `json:"project_ids,omitempty"`
+	AllowedIPs []string `json:"allowed_ips,omitempty"`
 	CreatedAt  string   `json:"created_at"`
 	ExpiresAt  *string  `json:"expires_at,omitempty"`
 	LastUsedAt *string  `json:"last_used_at,omitempty"`
@ -75,6 +79,10 @@ func apiKeyToResponse(k *auth.APIKey) KeyResponse {
 		resp.ProjectIDs = k.ProjectIDs
 	}

+	if k.AllowedIPs != nil {
+		resp.AllowedIPs = k.AllowedIPs
+	}
+
 	if k.ExpiresAt != nil {
 		s := k.ExpiresAt.Format("2006-01-02T15:04:05Z07:00")
 		resp.ExpiresAt = &s
@ -119,13 +127,11 @@ func (h *KeysHandler) Create(w http.ResponseWriter, r *http.Request) {
 		return
 	}

-	if req.Name == "" {
-		api.WriteBadRequest(w, r, "name is required")
-		return
-	}
-
-	if len(req.Scopes) == 0 {
-		api.WriteBadRequest(w, r, "scopes is required")
+	v := validate.New()
+	v.Required(req.Name, "name")
+	v.RequiredSlice(req.Scopes, "scopes")
+	if err := v.Error(); err != nil {
+		api.WriteBadRequest(w, r, err.Error())
 		return
 	}

@ -143,6 +149,14 @@ func (h *KeysHandler) Create(w http.ResponseWriter, r *http.Request) {
 		return
 	}

+	// Validate allowed_ips CIDR format
+	for _, cidr := range req.AllowedIPs {
+		if err := validateCIDROrIP(cidr); err != nil {
+			api.WriteBadRequest(w, r, "invalid allowed_ips: "+cidr+" is not a valid CIDR or IP address")
+			return
+		}
+	}
+
 	// Get creator from authenticated key
 	creator := "admin"
 	if apiKey := auth.GetAPIKey(r.Context()); apiKey != nil && apiKey.ID != "admin" {
@ -153,6 +167,7 @@ func (h *KeysHandler) Create(w http.ResponseWriter, r *http.Request) {
 		Name:       req.Name,
 		Scopes:     scopes,
 		ProjectIDs: req.ProjectIDs,
+		AllowedIPs: req.AllowedIPs,
 		ExpiresIn:  expiresIn,
 		CreatedBy:  creator,
 	})
@ -167,6 +182,23 @@ func (h *KeysHandler) Create(w http.ResponseWriter, r *http.Request) {
 	})
 }

+// validateCIDROrIP validates that a string is either a valid CIDR notation or a valid IP address.
+func validateCIDROrIP(cidr string) error {
+	// Try parsing as CIDR first
+	_, _, err := net.ParseCIDR(cidr)
+	if err == nil {
+		return nil
+	}
+
+	// Try parsing as a single IP address
+	ip := net.ParseIP(cidr)
+	if ip != nil {
+		return nil
+	}
+
+	return err
+}
+
 // Get returns a single API key.
 // GET /keys/{id}
 func (h *KeysHandler) Get(w http.ResponseWriter, r *http.Request) {
--- a/internal/handlers/keys_test.go
+++ b/internal/handlers/keys_test.go
@ -103,7 +103,7 @@ func TestKeysHandler_Create(t *testing.T) {
 				Scopes: []string{"projects:read"},
 			},
 			wantStatus: http.StatusBadRequest,
-			wantErr:    "name is required",
+			wantErr:    "name: is required",
 		},
 		{
 			name: "missing scopes",
@ -111,7 +111,7 @@ func TestKeysHandler_Create(t *testing.T) {
 				Name: "test-no-scopes",
 			},
 			wantStatus: http.StatusBadRequest,
-			wantErr:    "scopes is required",
+			wantErr:    "scopes: is required",
 		},
 		{
 			name: "invalid scope",
--- a/internal/handlers/projects.go
+++ b/internal/handlers/projects.go
@ -4,36 +4,53 @@ package handlers
 import (
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"net/http"
+	"strings"
 	"sync"
 	"sync/atomic"
 	"time"

 	"github.com/go-chi/chi/v5"
-	"github.com/orchard9/rdev/internal/executor"
-	"github.com/orchard9/rdev/internal/projects"
+	"github.com/orchard9/rdev/internal/adapter/kubernetes"
+	"github.com/orchard9/rdev/internal/auth"
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/port"
 	"github.com/orchard9/rdev/internal/sanitize"
+	"github.com/orchard9/rdev/internal/service"
+	"github.com/orchard9/rdev/internal/validate"
 	"github.com/orchard9/rdev/pkg/api"
 )

 // ProjectsHandler handles project-related endpoints.
 type ProjectsHandler struct {
-	registry *projects.Registry
-	executor *executor.Executor
+	// Legacy dependencies (for backward compatibility)
+	projectRepo *kubernetes.ProjectRepository
+	executor    *kubernetes.Executor
 	streams     *streamManager
 	cmdID       atomic.Uint64
+
+	// New hexagonal architecture dependencies
+	projectService *service.ProjectService
 }

-// NewProjectsHandler creates a new projects handler.
-func NewProjectsHandler() *ProjectsHandler {
+// NewProjectsHandler creates a new projects handler with injected dependencies.
+func NewProjectsHandler(projectRepo *kubernetes.ProjectRepository, executor *kubernetes.Executor) *ProjectsHandler {
 	return &ProjectsHandler{
-		registry: projects.NewRegistry("rdev"),
-		executor: executor.New("rdev"),
+		projectRepo: projectRepo,
+		executor:    executor,
 		streams:     newStreamManager(),
 	}
 }

+// NewProjectsHandlerWithService creates a new projects handler with injected service.
+func NewProjectsHandlerWithService(projectService *service.ProjectService) *ProjectsHandler {
+	return &ProjectsHandler{
+		projectService: projectService,
+	}
+}
+
 // Mount registers the projects routes.
 func (h *ProjectsHandler) Mount(r api.Router) {
 	r.Route("/projects", func(r chi.Router) {
@ -46,35 +63,123 @@ func (h *ProjectsHandler) Mount(r api.Router) {
 	})
 }

+// getAuditContext extracts audit-related information from the HTTP request.
+func getAuditContext(r *http.Request) *service.AuditContext {
+	apiKey := auth.GetAPIKey(r.Context())
+	if apiKey == nil {
+		return nil
+	}
+
+	return &service.AuditContext{
+		APIKeyID:  apiKey.ID,
+		ClientIP:  getClientIP(r),
+		UserAgent: r.UserAgent(),
+	}
+}
+
+// getClientIP extracts the client IP from the request.
+func getClientIP(r *http.Request) string {
+	// Check X-Forwarded-For header (set by proxies/load balancers)
+	if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
+		// Take the first IP in the chain
+		if idx := strings.Index(xff, ","); idx != -1 {
+			return strings.TrimSpace(xff[:idx])
+		}
+		return strings.TrimSpace(xff)
+	}
+
+	// Check X-Real-IP header
+	if xri := r.Header.Get("X-Real-IP"); xri != "" {
+		return strings.TrimSpace(xri)
+	}
+
+	// Fall back to RemoteAddr
+	addr := r.RemoteAddr
+	// Handle IPv6 addresses like "[::1]:8080"
+	if strings.HasPrefix(addr, "[") {
+		if idx := strings.LastIndex(addr, "]:"); idx != -1 {
+			return addr[1:idx]
+		}
+		return strings.Trim(addr, "[]")
+	}
+	// Handle IPv4 addresses like "192.168.1.1:8080"
+	if idx := strings.LastIndex(addr, ":"); idx != -1 {
+		return addr[:idx]
+	}
+	return addr
+}
+
 // List returns all available projects.
 // GET /projects
 func (h *ProjectsHandler) List(w http.ResponseWriter, r *http.Request) {
-	// Refresh status from K8s
 	ctx, cancel := context.WithTimeout(r.Context(), 5*time.Second)
 	defer cancel()
-	h.registry.RefreshStatus(ctx)

-	projects := h.registry.List()
+	// Use new service if available
+	if h.projectService != nil {
+		projects, err := h.projectService.List(ctx)
+		if err != nil {
+			api.WriteInternalError(w, r, "failed to list projects")
+			return
+		}
 		api.WriteSuccess(w, r, projects)
+		return
+	}
+
+	// Legacy path using hexagonal types
+	if h.projectRepo != nil {
+		_ = h.projectRepo.RefreshStatus(ctx)
+		projects, err := h.projectRepo.List(ctx)
+		if err != nil {
+			api.WriteInternalError(w, r, "failed to list projects")
+			return
+		}
+		api.WriteSuccess(w, r, projects)
+		return
+	}
+
+	api.WriteInternalError(w, r, "no project service configured")
 }

 // Get returns a specific project by ID.
 // GET /projects/{id}
 func (h *ProjectsHandler) Get(w http.ResponseWriter, r *http.Request) {
 	id := chi.URLParam(r, "id")
+	ctx, cancel := context.WithTimeout(r.Context(), 5*time.Second)
+	defer cancel()

-	project, ok := h.registry.Get(id)
-	if !ok {
+	// Use new service if available
+	if h.projectService != nil {
+		project, err := h.projectService.Get(ctx, domain.ProjectID(id))
+		if err != nil {
+			if errors.Is(err, domain.ErrProjectNotFound) {
 				api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
 				return
 			}
-
-	// Refresh this project's status
-	ctx, cancel := context.WithTimeout(r.Context(), 5*time.Second)
-	defer cancel()
-	h.registry.RefreshStatus(ctx)
-
+			api.WriteInternalError(w, r, "failed to get project")
+			return
+		}
 		api.WriteSuccess(w, r, project)
+		return
+	}
+
+	// Legacy path using hexagonal types
+	if h.projectRepo != nil {
+		_ = h.projectRepo.RefreshStatus(ctx)
+		project, err := h.projectRepo.Get(ctx, domain.ProjectID(id))
+		if err != nil {
+			if errors.Is(err, domain.ErrProjectNotFound) {
+				api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
+				return
+			}
+			api.WriteInternalError(w, r, "failed to get project")
+			return
+		}
+		api.WriteSuccess(w, r, project)
+		return
+	}
+
+	api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
 }

 // ClaudeRequest is the request body for POST /projects/{id}/claude.
@ -88,20 +193,60 @@ type ClaudeRequest struct {
 func (h *ProjectsHandler) RunClaude(w http.ResponseWriter, r *http.Request) {
 	id := chi.URLParam(r, "id")

-	project, ok := h.registry.Get(id)
-	if !ok {
-		api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
-		return
-	}
-
 	var req ClaudeRequest
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 		api.WriteBadRequest(w, r, "invalid request body")
 		return
 	}

-	if req.Prompt == "" {
-		api.WriteBadRequest(w, r, "prompt is required")
+	// Use new service if available
+	if h.projectService != nil {
+		result, err := h.projectService.ExecuteClaude(r.Context(), service.ExecuteClaudeRequest{
+			ProjectID: domain.ProjectID(id),
+			Prompt:    req.Prompt,
+			StreamID:  req.StreamID,
+			Audit:     getAuditContext(r),
+		})
+		if err != nil {
+			if errors.Is(err, domain.ErrProjectNotFound) {
+				api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
+				return
+			}
+			if errors.Is(err, domain.ErrInvalidCommand) || errors.Is(err, domain.ErrCommandSanitization) {
+				api.WriteBadRequest(w, r, err.Error())
+				return
+			}
+			api.WriteInternalError(w, r, "failed to execute command")
+			return
+		}
+		api.WriteCreated(w, r, map[string]any{
+			"id":         result.CommandID,
+			"project":    id,
+			"type":       "claude",
+			"status":     "running",
+			"stream_url": result.StreamURL,
+		})
+		return
+	}
+
+	// Legacy path using hexagonal types
+	if h.projectRepo == nil || h.executor == nil {
+		api.WriteInternalError(w, r, "no project service configured")
+		return
+	}
+
+	project, err := h.projectRepo.Get(r.Context(), domain.ProjectID(id))
+	if err != nil {
+		if errors.Is(err, domain.ErrProjectNotFound) {
+			api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
+			return
+		}
+		api.WriteInternalError(w, r, "failed to get project")
+		return
+	}
+
+	if err := validate.Required(req.Prompt, "prompt"); err != nil {
+		api.WriteBadRequest(w, r, err.Error())
 		return
 	}

@ -124,27 +269,25 @@ func (h *ProjectsHandler) RunClaude(w http.ResponseWriter, r *http.Request) {
 		cmdID = req.StreamID
 	}

-	// Create the command
-	cmd := &executor.Command{
-		ID:        cmdID,
-		PodName:   project.PodName,
-		Type:      executor.CommandTypeClaude,
+	// Create the command using domain types
+	cmd := &domain.Command{
+		ID:        domain.CommandID(cmdID),
+		ProjectID: domain.ProjectID(id),
+		Type:      domain.CommandTypeClaude,
 		Args:      []string{req.Prompt},
 		StartedAt: time.Now(),
 	}

 	// Execute in background
-	go h.executeCommand(cmd)
+	go h.executeCommand(cmd, project.PodName)

-	result := map[string]any{
+	api.WriteCreated(w, r, map[string]any{
 		"id":         cmdID,
 		"project":    id,
 		"type":       "claude",
 		"status":     "running",
 		"stream_url": fmt.Sprintf("/projects/%s/events?stream_id=%s", id, cmdID),
-	}
-
-	api.WriteCreated(w, r, result)
+	})
 }

 // ShellRequest is the request body for POST /projects/{id}/shell.
@ -158,20 +301,60 @@ type ShellRequest struct {
 func (h *ProjectsHandler) RunShell(w http.ResponseWriter, r *http.Request) {
 	id := chi.URLParam(r, "id")

-	project, ok := h.registry.Get(id)
-	if !ok {
-		api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
-		return
-	}
-
 	var req ShellRequest
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 		api.WriteBadRequest(w, r, "invalid request body")
 		return
 	}

-	if req.Command == "" {
-		api.WriteBadRequest(w, r, "command is required")
+	// Use new service if available
+	if h.projectService != nil {
+		result, err := h.projectService.ExecuteShell(r.Context(), service.ExecuteShellRequest{
+			ProjectID: domain.ProjectID(id),
+			Command:   req.Command,
+			StreamID:  req.StreamID,
+			Audit:     getAuditContext(r),
+		})
+		if err != nil {
+			if errors.Is(err, domain.ErrProjectNotFound) {
+				api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
+				return
+			}
+			if errors.Is(err, domain.ErrInvalidCommand) || errors.Is(err, domain.ErrCommandSanitization) {
+				api.WriteBadRequest(w, r, err.Error())
+				return
+			}
+			api.WriteInternalError(w, r, "failed to execute command")
+			return
+		}
+		api.WriteCreated(w, r, map[string]any{
+			"id":         result.CommandID,
+			"project":    id,
+			"type":       "shell",
+			"status":     "running",
+			"stream_url": result.StreamURL,
+		})
+		return
+	}
+
+	// Legacy path using hexagonal types
+	if h.projectRepo == nil || h.executor == nil {
+		api.WriteInternalError(w, r, "no project service configured")
+		return
+	}
+
+	project, err := h.projectRepo.Get(r.Context(), domain.ProjectID(id))
+	if err != nil {
+		if errors.Is(err, domain.ErrProjectNotFound) {
+			api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
+			return
+		}
+		api.WriteInternalError(w, r, "failed to get project")
+		return
+	}
+
+	if err := validate.Required(req.Command, "command"); err != nil {
+		api.WriteBadRequest(w, r, err.Error())
 		return
 	}

@ -194,27 +377,25 @@ func (h *ProjectsHandler) RunShell(w http.ResponseWriter, r *http.Request) {
 		cmdID = req.StreamID
 	}

-	// Create the command
-	cmd := &executor.Command{
-		ID:        cmdID,
-		PodName:   project.PodName,
-		Type:      executor.CommandTypeShell,
+	// Create the command using domain types
+	cmd := &domain.Command{
+		ID:        domain.CommandID(cmdID),
+		ProjectID: domain.ProjectID(id),
+		Type:      domain.CommandTypeShell,
 		Args:      []string{req.Command},
 		StartedAt: time.Now(),
 	}

 	// Execute in background
-	go h.executeCommand(cmd)
+	go h.executeCommand(cmd, project.PodName)

-	result := map[string]any{
+	api.WriteCreated(w, r, map[string]any{
 		"id":         cmdID,
 		"project":    id,
 		"type":       "shell",
 		"status":     "running",
 		"stream_url": fmt.Sprintf("/projects/%s/events?stream_id=%s", id, cmdID),
-	}
-
-	api.WriteCreated(w, r, result)
+	})
 }

 // GitRequest is the request body for POST /projects/{id}/git.
@ -228,20 +409,60 @@ type GitRequest struct {
 func (h *ProjectsHandler) RunGit(w http.ResponseWriter, r *http.Request) {
 	id := chi.URLParam(r, "id")

-	project, ok := h.registry.Get(id)
-	if !ok {
-		api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
-		return
-	}
-
 	var req GitRequest
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 		api.WriteBadRequest(w, r, "invalid request body")
 		return
 	}

-	if len(req.Args) == 0 {
-		api.WriteBadRequest(w, r, "args is required")
+	// Use new service if available
+	if h.projectService != nil {
+		result, err := h.projectService.ExecuteGit(r.Context(), service.ExecuteGitRequest{
+			ProjectID: domain.ProjectID(id),
+			Args:      req.Args,
+			StreamID:  req.StreamID,
+			Audit:     getAuditContext(r),
+		})
+		if err != nil {
+			if errors.Is(err, domain.ErrProjectNotFound) {
+				api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
+				return
+			}
+			if errors.Is(err, domain.ErrInvalidCommand) || errors.Is(err, domain.ErrCommandSanitization) {
+				api.WriteBadRequest(w, r, err.Error())
+				return
+			}
+			api.WriteInternalError(w, r, "failed to execute command")
+			return
+		}
+		api.WriteCreated(w, r, map[string]any{
+			"id":         result.CommandID,
+			"project":    id,
+			"type":       "git",
+			"status":     "running",
+			"stream_url": result.StreamURL,
+		})
+		return
+	}
+
+	// Legacy path using hexagonal types
+	if h.projectRepo == nil || h.executor == nil {
+		api.WriteInternalError(w, r, "no project service configured")
+		return
+	}
+
+	project, err := h.projectRepo.Get(r.Context(), domain.ProjectID(id))
+	if err != nil {
+		if errors.Is(err, domain.ErrProjectNotFound) {
+			api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
+			return
+		}
+		api.WriteInternalError(w, r, "failed to get project")
+		return
+	}
+
+	if err := validate.RequiredSlice(req.Args, "args"); err != nil {
+		api.WriteBadRequest(w, r, err.Error())
 		return
 	}

@ -264,43 +485,42 @@ func (h *ProjectsHandler) RunGit(w http.ResponseWriter, r *http.Request) {
 		cmdID = req.StreamID
 	}

-	// Create the command
-	cmd := &executor.Command{
-		ID:        cmdID,
-		PodName:   project.PodName,
-		Type:      executor.CommandTypeGit,
+	// Create the command using domain types
+	cmd := &domain.Command{
+		ID:        domain.CommandID(cmdID),
+		ProjectID: domain.ProjectID(id),
+		Type:      domain.CommandTypeGit,
 		Args:      req.Args,
 		StartedAt: time.Now(),
 	}

 	// Execute in background
-	go h.executeCommand(cmd)
+	go h.executeCommand(cmd, project.PodName)

-	result := map[string]any{
+	api.WriteCreated(w, r, map[string]any{
 		"id":         cmdID,
 		"project":    id,
 		"type":       "git",
 		"status":     "running",
 		"stream_url": fmt.Sprintf("/projects/%s/events?stream_id=%s", id, cmdID),
-	}
-
-	api.WriteCreated(w, r, result)
+	})
 }

 // executeCommand runs a command and streams output to subscribers.
-func (h *ProjectsHandler) executeCommand(cmd *executor.Command) {
+func (h *ProjectsHandler) executeCommand(cmd *domain.Command, podName string) {
 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
 	defer cancel()

-	result := h.executor.Exec(ctx, cmd, func(stream, line string) {
-		h.streams.Send(cmd.ID, "output", map[string]any{
-			"line":   line,
-			"stream": stream,
+	cmdID := string(cmd.ID)
+	result, _ := h.executor.Execute(ctx, cmd, podName, func(line domain.OutputLine) {
+		h.streams.Send(cmdID, "output", map[string]any{
+			"line":   line.Line,
+			"stream": line.Stream,
 		})
 	})

 	// Send completion event
-	h.streams.Send(cmd.ID, "complete", map[string]any{
+	h.streams.Send(cmdID, "complete", map[string]any{
 		"exit_code":   result.ExitCode,
 		"duration_ms": result.DurationMs,
 	})
@ -308,17 +528,32 @@ func (h *ProjectsHandler) executeCommand(cmd *executor.Command) {
 	// Clean up stream after a delay
 	go func() {
 		time.Sleep(30 * time.Second)
-		h.streams.Close(cmd.ID)
+		h.streams.Close(cmdID)
 	}()
 }

 // Events streams command output via Server-Sent Events.
 // GET /projects/{id}/events
+// Supports Last-Event-ID header for reconnection with event replay.
 func (h *ProjectsHandler) Events(w http.ResponseWriter, r *http.Request) {
 	id := chi.URLParam(r, "id")
 	streamID := r.URL.Query().Get("stream_id")
+	lastEventID := r.Header.Get("Last-Event-ID")

-	if !h.registry.Exists(id) {
+	// Check project exists
+	if h.projectService != nil {
+		exists, err := h.projectService.Exists(r.Context(), domain.ProjectID(id))
+		if err != nil || !exists {
+			api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
+			return
+		}
+	} else if h.projectRepo != nil {
+		exists, err := h.projectRepo.Exists(r.Context(), domain.ProjectID(id))
+		if err != nil || !exists {
+			api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
+			return
+		}
+	} else {
 		api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", id))
 		return
 	}
@ -331,18 +566,55 @@ func (h *ProjectsHandler) Events(w http.ResponseWriter, r *http.Request) {

 	flusher, ok := w.(http.Flusher)
 	if !ok {
-		http.Error(w, "SSE not supported", http.StatusInternalServerError)
+		api.WriteInternalError(w, r, "SSE not supported")
 		return
 	}

-	// Subscribe to events
-	events := h.streams.Subscribe(streamID)
-	defer h.streams.Unsubscribe(streamID, events)
+	// Subscribe to events - use service if available, with Last-Event-ID support
+	var events <-chan port.StreamEvent
+	var cleanup func()
+	if h.projectService != nil {
+		if lastEventID != "" {
+			events, cleanup = h.projectService.SubscribeFromID(streamID, lastEventID)
+		} else {
+			events, cleanup = h.projectService.Subscribe(streamID)
+		}
+	} else {
+		legacyEvents := h.streams.Subscribe(streamID)
+		// Create adapter from legacy to port.StreamEvent with context cancellation
+		portEvents := make(chan port.StreamEvent, 100)
+		adapterCtx, adapterCancel := context.WithCancel(r.Context())
+		go func() {
+			defer close(portEvents)
+			for {
+				select {
+				case ev, ok := <-legacyEvents:
+					if !ok {
+						return
+					}
+					select {
+					case portEvents <- port.StreamEvent{Type: ev.Type, Data: ev.Data}:
+					case <-adapterCtx.Done():
+						return
+					}
+				case <-adapterCtx.Done():
+					return
+				}
+			}
+		}()
+		events = portEvents
+		cleanup = func() {
+			adapterCancel()
+			h.streams.Unsubscribe(streamID, legacyEvents)
+		}
+	}
+	defer cleanup()

 	// Send initial connected event
 	writeSSE(w, flusher, "connected", map[string]any{
 		"project":      id,
 		"stream_id":    streamID,
+		"reconnecting": lastEventID != "",
 	})

 	// Stream events until client disconnects or stream closes
@ -358,7 +630,8 @@ func (h *ProjectsHandler) Events(w http.ResponseWriter, r *http.Request) {
 			if !ok {
 				return
 			}
-			writeSSE(w, flusher, event.Type, event.Data)
+			// Include event ID in SSE output for reconnection support
+			writeSSEWithID(w, flusher, event.ID, event.Type, event.Data)
 			if event.Type == "complete" {
 				return
 			}
@ -372,9 +645,17 @@ func (h *ProjectsHandler) Events(w http.ResponseWriter, r *http.Request) {

 // writeSSE writes a Server-Sent Event.
 func writeSSE(w http.ResponseWriter, flusher http.Flusher, event string, data map[string]any) {
+	writeSSEWithID(w, flusher, "", event, data)
+}
+
+// writeSSEWithID writes a Server-Sent Event with an optional event ID.
+func writeSSEWithID(w http.ResponseWriter, flusher http.Flusher, id, event string, data map[string]any) {
 	dataBytes, _ := json.Marshal(data)
-	fmt.Fprintf(w, "event: %s\n", event)
-	fmt.Fprintf(w, "data: %s\n\n", dataBytes)
+	if id != "" {
+		_, _ = fmt.Fprintf(w, "id: %s\n", id)
+	}
+	_, _ = fmt.Fprintf(w, "event: %s\n", event)
+	_, _ = fmt.Fprintf(w, "data: %s\n\n", dataBytes)
 	flusher.Flush()
 }

@ -441,12 +722,12 @@ func (sm *streamManager) Close(streamID string) {
 	delete(sm.streams, streamID)
 }

-// Registry returns the project registry for use by other handlers.
-func (h *ProjectsHandler) Registry() *projects.Registry {
-	return h.registry
+// ProjectRepository returns the project repository for use by other handlers.
+func (h *ProjectsHandler) ProjectRepository() *kubernetes.ProjectRepository {
+	return h.projectRepo
 }

 // Executor returns the executor for use by other handlers.
-func (h *ProjectsHandler) Executor() *executor.Executor {
+func (h *ProjectsHandler) Executor() *kubernetes.Executor {
 	return h.executor
 }
--- a/internal/handlers/projects_bench_test.go
+++ b/internal/handlers/projects_bench_test.go
@ -0,0 +1,281 @@
+package handlers
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http/httptest"
+	"sync"
+	"testing"
+
+	"github.com/go-chi/chi/v5"
+	"github.com/orchard9/rdev/internal/adapter/kubernetes"
+)
+
+// setupBenchHandler creates a handler for benchmarking.
+func setupBenchHandler() (*ProjectsHandler, chi.Router) {
+	repo := kubernetes.NewProjectRepository("test-namespace")
+	exec := kubernetes.NewExecutor("test-namespace")
+	h := NewProjectsHandler(repo, exec)
+
+	router := chi.NewRouter()
+	h.Mount(router)
+
+	return h, router
+}
+
+// BenchmarkRunClaude benchmarks the RunClaude endpoint.
+// This measures the handler overhead excluding actual command execution.
+func BenchmarkRunClaude(b *testing.B) {
+	_, router := setupBenchHandler()
+
+	body := ClaudeRequest{Prompt: "test prompt"}
+	bodyBytes, _ := json.Marshal(body)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		req := httptest.NewRequest("POST", "/projects/pantheon/claude",
+			bytes.NewReader(bodyBytes))
+		req.Header.Set("Content-Type", "application/json")
+		rec := httptest.NewRecorder()
+		router.ServeHTTP(rec, req)
+	}
+}
+
+// BenchmarkRunShell benchmarks the RunShell endpoint.
+func BenchmarkRunShell(b *testing.B) {
+	_, router := setupBenchHandler()
+
+	body := ShellRequest{Command: "ls -la"}
+	bodyBytes, _ := json.Marshal(body)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		req := httptest.NewRequest("POST", "/projects/pantheon/shell",
+			bytes.NewReader(bodyBytes))
+		req.Header.Set("Content-Type", "application/json")
+		rec := httptest.NewRecorder()
+		router.ServeHTTP(rec, req)
+	}
+}
+
+// BenchmarkRunGit benchmarks the RunGit endpoint.
+func BenchmarkRunGit(b *testing.B) {
+	_, router := setupBenchHandler()
+
+	body := GitRequest{Args: []string{"status"}}
+	bodyBytes, _ := json.Marshal(body)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		req := httptest.NewRequest("POST", "/projects/pantheon/git",
+			bytes.NewReader(bodyBytes))
+		req.Header.Set("Content-Type", "application/json")
+		rec := httptest.NewRecorder()
+		router.ServeHTTP(rec, req)
+	}
+}
+
+// BenchmarkList benchmarks the List endpoint.
+func BenchmarkList(b *testing.B) {
+	_, router := setupBenchHandler()
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		req := httptest.NewRequest("GET", "/projects", nil)
+		rec := httptest.NewRecorder()
+		router.ServeHTTP(rec, req)
+	}
+}
+
+// BenchmarkGet benchmarks the Get endpoint.
+func BenchmarkGet(b *testing.B) {
+	_, router := setupBenchHandler()
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		req := httptest.NewRequest("GET", "/projects/pantheon", nil)
+		rec := httptest.NewRecorder()
+		router.ServeHTTP(rec, req)
+	}
+}
+
+// BenchmarkSSEStreaming benchmarks the SSE event throughput.
+// This measures how fast events can be written through the stream manager.
+func BenchmarkSSEStreaming(b *testing.B) {
+	h, _ := setupBenchHandler()
+
+	// Subscribe to a stream
+	streamID := "bench-stream"
+	events := h.streams.Subscribe(streamID)
+	defer h.streams.Unsubscribe(streamID, events)
+
+	// Drain events in background
+	done := make(chan struct{})
+	go func() {
+		for range events {
+		}
+		close(done)
+	}()
+
+	eventData := map[string]any{
+		"line":   "benchmark output line",
+		"stream": "stdout",
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		h.streams.Send(streamID, "output", eventData)
+	}
+
+	// Cleanup
+	h.streams.Close(streamID)
+	<-done
+}
+
+// BenchmarkSSEParallelStreaming benchmarks concurrent SSE event throughput.
+func BenchmarkSSEParallelStreaming(b *testing.B) {
+	h, _ := setupBenchHandler()
+
+	streamID := "bench-parallel-stream"
+	events := h.streams.Subscribe(streamID)
+	defer h.streams.Unsubscribe(streamID, events)
+
+	// Drain events in background
+	done := make(chan struct{})
+	go func() {
+		for range events {
+		}
+		close(done)
+	}()
+
+	eventData := map[string]any{
+		"line":   "benchmark output line",
+		"stream": "stdout",
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			h.streams.Send(streamID, "output", eventData)
+		}
+	})
+
+	// Cleanup
+	h.streams.Close(streamID)
+	<-done
+}
+
+// BenchmarkJSONSerialization benchmarks response JSON serialization.
+func BenchmarkJSONSerialization(b *testing.B) {
+	response := map[string]any{
+		"id":         "cmd-test-001",
+		"project":    "pantheon",
+		"type":       "claude",
+		"status":     "running",
+		"stream_url": "/projects/pantheon/events?stream_id=cmd-test-001",
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_, _ = json.Marshal(response)
+	}
+}
+
+// BenchmarkConcurrentRequests benchmarks concurrent request handling.
+func BenchmarkConcurrentRequests(b *testing.B) {
+	_, router := setupBenchHandler()
+
+	body := ClaudeRequest{Prompt: "test"}
+	bodyBytes, _ := json.Marshal(body)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			req := httptest.NewRequest("POST", "/projects/pantheon/claude",
+				bytes.NewReader(bodyBytes))
+			req.Header.Set("Content-Type", "application/json")
+			rec := httptest.NewRecorder()
+			router.ServeHTTP(rec, req)
+		}
+	})
+}
+
+// BenchmarkRouteMatching benchmarks chi router pattern matching.
+func BenchmarkRouteMatching(b *testing.B) {
+	_, router := setupBenchHandler()
+
+	paths := []string{
+		"/projects",
+		"/projects/pantheon",
+		"/projects/pantheon/claude",
+		"/projects/pantheon/shell",
+		"/projects/pantheon/git",
+		"/projects/pantheon/events",
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		path := paths[i%len(paths)]
+		req := httptest.NewRequest("GET", path, nil)
+		rec := httptest.NewRecorder()
+		router.ServeHTTP(rec, req)
+	}
+}
+
+// BenchmarkMultipleSubscribers benchmarks event fanout to multiple subscribers.
+func BenchmarkMultipleSubscribers(b *testing.B) {
+	h, _ := setupBenchHandler()
+
+	streamID := "bench-multi-stream"
+	const numSubscribers = 10
+
+	// Create multiple subscribers
+	subscribers := make([]chan streamEvent, numSubscribers)
+	var wg sync.WaitGroup
+
+	for i := 0; i < numSubscribers; i++ {
+		subscribers[i] = h.streams.Subscribe(streamID)
+		wg.Add(1)
+		go func(ch chan streamEvent) {
+			defer wg.Done()
+			for range ch {
+			}
+		}(subscribers[i])
+	}
+
+	eventData := map[string]any{
+		"line":   "benchmark output line",
+		"stream": "stdout",
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		h.streams.Send(streamID, "output", eventData)
+	}
+
+	// Cleanup
+	h.streams.Close(streamID)
+	wg.Wait()
+}
--- a/internal/handlers/projects_test.go
+++ b/internal/handlers/projects_test.go
@ -9,11 +9,19 @@ import (
 	"testing"

 	"github.com/go-chi/chi/v5"
+	"github.com/orchard9/rdev/internal/adapter/kubernetes"
 )

+// newTestProjectsHandler creates a ProjectsHandler for testing.
+func newTestProjectsHandler() *ProjectsHandler {
+	repo := kubernetes.NewProjectRepository("test-namespace")
+	exec := kubernetes.NewExecutor("test-namespace")
+	return NewProjectsHandler(repo, exec)
+}
+
 // TestProjectsHandler_List tests the List endpoint.
 func TestProjectsHandler_List(t *testing.T) {
-	h := NewProjectsHandler()
+	h := newTestProjectsHandler()
 	router := chi.NewRouter()
 	h.Mount(router)

@ -42,7 +50,7 @@ func TestProjectsHandler_List(t *testing.T) {

 // TestProjectsHandler_Get tests the Get endpoint.
 func TestProjectsHandler_Get(t *testing.T) {
-	h := NewProjectsHandler()
+	h := newTestProjectsHandler()
 	router := chi.NewRouter()
 	h.Mount(router)

@ -71,7 +79,7 @@ func TestProjectsHandler_Get(t *testing.T) {

 // TestProjectsHandler_RunClaude tests the RunClaude endpoint.
 func TestProjectsHandler_RunClaude(t *testing.T) {
-	h := NewProjectsHandler()
+	h := newTestProjectsHandler()
 	router := chi.NewRouter()
 	h.Mount(router)

@ -97,7 +105,7 @@ func TestProjectsHandler_RunClaude(t *testing.T) {
 				Prompt: "",
 			},
 			wantStatus: http.StatusBadRequest,
-			wantErr:    "prompt is required",
+			wantErr:    "prompt: is required",
 		},
 		{
 			name:       "project not found",
@ -150,7 +158,7 @@ func TestProjectsHandler_RunClaude(t *testing.T) {

 // TestProjectsHandler_RunShell tests the RunShell endpoint.
 func TestProjectsHandler_RunShell(t *testing.T) {
-	h := NewProjectsHandler()
+	h := newTestProjectsHandler()
 	router := chi.NewRouter()
 	h.Mount(router)

@ -176,7 +184,7 @@ func TestProjectsHandler_RunShell(t *testing.T) {
 				Command: "",
 			},
 			wantStatus: http.StatusBadRequest,
-			wantErr:    "command is required",
+			wantErr:    "command: is required",
 		},
 		{
 			name:      "dangerous command with semicolon",
@ -255,7 +263,7 @@ func TestProjectsHandler_RunShell(t *testing.T) {

 // TestProjectsHandler_RunGit tests the RunGit endpoint.
 func TestProjectsHandler_RunGit(t *testing.T) {
-	h := NewProjectsHandler()
+	h := newTestProjectsHandler()
 	router := chi.NewRouter()
 	h.Mount(router)

@ -289,7 +297,7 @@ func TestProjectsHandler_RunGit(t *testing.T) {
 				Args: []string{},
 			},
 			wantStatus: http.StatusBadRequest,
-			wantErr:    "args is required",
+			wantErr:    "args: is required",
 		},
 		{
 			name:      "git config blocked",
@ -350,7 +358,7 @@ func TestProjectsHandler_RunGit(t *testing.T) {

 // TestProjectsHandler_Events tests the Events SSE endpoint.
 func TestProjectsHandler_Events(t *testing.T) {
-	h := NewProjectsHandler()
+	h := newTestProjectsHandler()
 	router := chi.NewRouter()
 	h.Mount(router)

@ -371,7 +379,7 @@ func TestProjectsHandler_Events(t *testing.T) {

 // TestProjectsHandler_InvalidJSON tests handling of invalid JSON bodies.
 func TestProjectsHandler_InvalidJSON(t *testing.T) {
-	h := NewProjectsHandler()
+	h := newTestProjectsHandler()
 	router := chi.NewRouter()
 	h.Mount(router)

@ -405,7 +413,7 @@ func TestProjectsHandler_InvalidJSON(t *testing.T) {

 // TestCommandIDGeneration tests that command IDs are generated correctly.
 func TestCommandIDGeneration(t *testing.T) {
-	h := NewProjectsHandler()
+	h := newTestProjectsHandler()
 	router := chi.NewRouter()
 	h.Mount(router)

@ -438,7 +446,7 @@ func TestCommandIDGeneration(t *testing.T) {

 // TestCustomStreamID tests that custom stream IDs are used when provided.
 func TestCustomStreamID(t *testing.T) {
-	h := NewProjectsHandler()
+	h := newTestProjectsHandler()
 	router := chi.NewRouter()
 	h.Mount(router)

--- a/internal/handlers/queue.go
+++ b/internal/handlers/queue.go
@ -0,0 +1,357 @@
+// Package handlers provides HTTP handlers for the rdev API.
+package handlers
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/http"
+	"strconv"
+
+	"github.com/go-chi/chi/v5"
+	"github.com/orchard9/rdev/internal/auth"
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/port"
+	"github.com/orchard9/rdev/internal/sanitize"
+	"github.com/orchard9/rdev/pkg/api"
+)
+
+// QueueHandler handles command queue endpoints.
+type QueueHandler struct {
+	queue    port.CommandQueue
+	projects port.ProjectRepository
+}
+
+// NewQueueHandler creates a new queue handler.
+func NewQueueHandler(queue port.CommandQueue, projects port.ProjectRepository) *QueueHandler {
+	return &QueueHandler{
+		queue:    queue,
+		projects: projects,
+	}
+}
+
+// Mount registers the queue routes.
+func (h *QueueHandler) Mount(r api.Router) {
+	r.Route("/projects/{id}/queue", func(r chi.Router) {
+		r.Post("/", h.Enqueue)
+		r.Get("/", h.List)
+		r.Get("/stats", h.Stats)
+		r.Get("/{cmdId}", h.GetByID)
+		r.Delete("/{cmdId}", h.Cancel)
+	})
+}
+
+// EnqueueRequest is the request body for POST /projects/{id}/queue.
+type EnqueueRequest struct {
+	Command     string `json:"command"`               // Required: the command to execute
+	CommandType string `json:"command_type"`          // Required: claude, shell, or git
+	WorkingDir  string `json:"working_dir,omitempty"` // Optional: working directory
+	Priority    int    `json:"priority,omitempty"`    // Optional: higher = more urgent (default: 0)
+}
+
+// MaxCommandSize is the maximum allowed size for command payloads (10KB).
+const MaxCommandSize = 10 * 1024
+
+// EnqueueResponse is the response for POST /projects/{id}/queue.
+type EnqueueResponse struct {
+	ID        string `json:"id"`
+	ProjectID string `json:"project_id"`
+	Status    string `json:"status"`
+	StreamURL string `json:"stream_url"`
+	Position  int    `json:"position,omitempty"` // Approximate queue position
+}
+
+// Enqueue adds a command to the project's queue.
+// POST /projects/{id}/queue
+func (h *QueueHandler) Enqueue(w http.ResponseWriter, r *http.Request) {
+	projectID := chi.URLParam(r, "id")
+
+	// Check project exists
+	exists, err := h.projects.Exists(r.Context(), domain.ProjectID(projectID))
+	if err != nil {
+		api.WriteInternalError(w, r, "failed to check project")
+		return
+	}
+	if !exists {
+		api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", projectID))
+		return
+	}
+
+	// Parse request
+	var req EnqueueRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		api.WriteBadRequest(w, r, "invalid request body")
+		return
+	}
+
+	// Validate command type
+	var cmdType domain.CommandType
+	switch req.CommandType {
+	case "claude":
+		cmdType = domain.CommandTypeClaude
+	case "shell":
+		cmdType = domain.CommandTypeShell
+	case "git":
+		cmdType = domain.CommandTypeGit
+	default:
+		api.WriteBadRequest(w, r, "command_type must be one of: claude, shell, git")
+		return
+	}
+
+	// Validate command
+	if req.Command == "" {
+		api.WriteBadRequest(w, r, "command is required")
+		return
+	}
+
+	// Validate command size to prevent large payloads
+	if len(req.Command) > MaxCommandSize {
+		api.WriteBadRequest(w, r, fmt.Sprintf("command exceeds maximum size of %d bytes", MaxCommandSize))
+		return
+	}
+
+	// Sanitize based on command type
+	switch cmdType {
+	case domain.CommandTypeClaude:
+		if err := sanitize.ClaudePrompt(req.Command); err != nil {
+			api.WriteBadRequest(w, r, err.Error())
+			return
+		}
+	case domain.CommandTypeShell:
+		if err := sanitize.ShellCommand(req.Command); err != nil {
+			api.WriteBadRequest(w, r, err.Error())
+			return
+		}
+	case domain.CommandTypeGit:
+		// For git, the command should be JSON-encoded args
+		var gitArgs []string
+		if err := json.Unmarshal([]byte(req.Command), &gitArgs); err != nil {
+			api.WriteBadRequest(w, r, "git command must be JSON array of args")
+			return
+		}
+		if err := sanitize.GitArgs(gitArgs); err != nil {
+			api.WriteBadRequest(w, r, err.Error())
+			return
+		}
+	}
+
+	// Get API key ID for audit trail
+	var apiKeyID string
+	if apiKey := auth.GetAPIKey(r.Context()); apiKey != nil {
+		apiKeyID = apiKey.ID
+	}
+
+	// Create queued command
+	cmd := &domain.QueuedCommand{
+		ProjectID:   projectID,
+		Command:     req.Command,
+		CommandType: cmdType,
+		WorkingDir:  req.WorkingDir,
+		Status:      domain.QueueStatusPending,
+		Priority:    req.Priority,
+		APIKeyID:    apiKeyID,
+	}
+
+	// Enqueue
+	if err := h.queue.Enqueue(r.Context(), cmd); err != nil {
+		api.WriteInternalError(w, r, "failed to enqueue command")
+		return
+	}
+
+	// Get approximate queue position
+	pendingStatus := domain.QueueStatusPending
+	pending, _ := h.queue.List(r.Context(), projectID, &domain.QueueFilters{
+		Status:    &pendingStatus,
+		Limit:     1000,
+		SortOrder: "asc",
+	})
+	position := len(pending)
+
+	api.WriteCreated(w, r, EnqueueResponse{
+		ID:        string(cmd.ID),
+		ProjectID: projectID,
+		Status:    string(cmd.Status),
+		StreamURL: fmt.Sprintf("/projects/%s/events?stream_id=%s", projectID, cmd.ID),
+		Position:  position,
+	})
+}
+
+// ListResponse is the response for GET /projects/{id}/queue.
+type ListResponse struct {
+	Commands []*domain.QueuedCommand `json:"commands"`
+	Total    int                     `json:"total"`
+	Limit    int                     `json:"limit"`
+	Offset   int                     `json:"offset"`
+}
+
+// List returns queued commands for a project.
+// GET /projects/{id}/queue
+func (h *QueueHandler) List(w http.ResponseWriter, r *http.Request) {
+	projectID := chi.URLParam(r, "id")
+
+	// Check project exists
+	exists, err := h.projects.Exists(r.Context(), domain.ProjectID(projectID))
+	if err != nil {
+		api.WriteInternalError(w, r, "failed to check project")
+		return
+	}
+	if !exists {
+		api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", projectID))
+		return
+	}
+
+	// Parse query params
+	filters := domain.DefaultQueueFilters()
+
+	if status := r.URL.Query().Get("status"); status != "" {
+		s := domain.QueueStatus(status)
+		filters.Status = &s
+	}
+
+	if limit := r.URL.Query().Get("limit"); limit != "" {
+		if l, err := strconv.Atoi(limit); err == nil && l > 0 && l <= 1000 {
+			filters.Limit = l
+		}
+	}
+
+	if offset := r.URL.Query().Get("offset"); offset != "" {
+		if o, err := strconv.Atoi(offset); err == nil && o >= 0 {
+			filters.Offset = o
+		}
+	}
+
+	if sort := r.URL.Query().Get("sort"); sort == "asc" || sort == "desc" {
+		filters.SortOrder = sort
+	}
+
+	// List commands
+	commands, err := h.queue.List(r.Context(), projectID, filters)
+	if err != nil {
+		api.WriteInternalError(w, r, "failed to list commands")
+		return
+	}
+
+	if commands == nil {
+		commands = []*domain.QueuedCommand{}
+	}
+
+	api.WriteSuccess(w, r, ListResponse{
+		Commands: commands,
+		Total:    len(commands),
+		Limit:    filters.Limit,
+		Offset:   filters.Offset,
+	})
+}
+
+// GetByID returns a specific queued command.
+// GET /projects/{id}/queue/{cmdId}
+func (h *QueueHandler) GetByID(w http.ResponseWriter, r *http.Request) {
+	projectID := chi.URLParam(r, "id")
+	cmdID := chi.URLParam(r, "cmdId")
+
+	// Check project exists
+	exists, err := h.projects.Exists(r.Context(), domain.ProjectID(projectID))
+	if err != nil {
+		api.WriteInternalError(w, r, "failed to check project")
+		return
+	}
+	if !exists {
+		api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", projectID))
+		return
+	}
+
+	// Get command
+	cmd, err := h.queue.GetByID(r.Context(), domain.QueuedCommandID(cmdID))
+	if err != nil {
+		if errors.Is(err, domain.ErrCommandNotFound) {
+			api.WriteNotFound(w, r, fmt.Sprintf("command not found: %s", cmdID))
+			return
+		}
+		api.WriteInternalError(w, r, "failed to get command")
+		return
+	}
+
+	// Verify command belongs to project
+	if cmd.ProjectID != projectID {
+		api.WriteNotFound(w, r, fmt.Sprintf("command not found: %s", cmdID))
+		return
+	}
+
+	api.WriteSuccess(w, r, cmd)
+}
+
+// Cancel cancels a pending queued command.
+// DELETE /projects/{id}/queue/{cmdId}
+func (h *QueueHandler) Cancel(w http.ResponseWriter, r *http.Request) {
+	projectID := chi.URLParam(r, "id")
+	cmdID := chi.URLParam(r, "cmdId")
+
+	// Check project exists
+	exists, err := h.projects.Exists(r.Context(), domain.ProjectID(projectID))
+	if err != nil {
+		api.WriteInternalError(w, r, "failed to check project")
+		return
+	}
+	if !exists {
+		api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", projectID))
+		return
+	}
+
+	// Verify command exists and belongs to project
+	cmd, err := h.queue.GetByID(r.Context(), domain.QueuedCommandID(cmdID))
+	if err != nil {
+		if errors.Is(err, domain.ErrCommandNotFound) {
+			api.WriteNotFound(w, r, fmt.Sprintf("command not found: %s", cmdID))
+			return
+		}
+		api.WriteInternalError(w, r, "failed to get command")
+		return
+	}
+
+	if cmd.ProjectID != projectID {
+		api.WriteNotFound(w, r, fmt.Sprintf("command not found: %s", cmdID))
+		return
+	}
+
+	// Cancel command
+	if err := h.queue.Cancel(r.Context(), domain.QueuedCommandID(cmdID)); err != nil {
+		if errors.Is(err, domain.ErrCommandNotFound) {
+			api.WriteNotFound(w, r, fmt.Sprintf("command not found: %s", cmdID))
+			return
+		}
+		api.WriteBadRequest(w, r, err.Error())
+		return
+	}
+
+	api.WriteSuccess(w, r, map[string]any{
+		"id":      cmdID,
+		"status":  "cancelled",
+		"message": "command cancelled successfully",
+	})
+}
+
+// Stats returns queue statistics for a project.
+// GET /projects/{id}/queue/stats
+func (h *QueueHandler) Stats(w http.ResponseWriter, r *http.Request) {
+	projectID := chi.URLParam(r, "id")
+
+	// Check project exists
+	exists, err := h.projects.Exists(r.Context(), domain.ProjectID(projectID))
+	if err != nil {
+		api.WriteInternalError(w, r, "failed to check project")
+		return
+	}
+	if !exists {
+		api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", projectID))
+		return
+	}
+
+	// Get stats
+	stats, err := h.queue.GetStats(r.Context(), projectID)
+	if err != nil {
+		api.WriteInternalError(w, r, "failed to get queue stats")
+		return
+	}
+
+	api.WriteSuccess(w, r, stats)
+}
--- a/internal/handlers/queue_test.go
+++ b/internal/handlers/queue_test.go
@ -0,0 +1,535 @@
+package handlers
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/go-chi/chi/v5"
+	"github.com/orchard9/rdev/internal/domain"
+)
+
+// mockCommandQueue implements port.CommandQueue for testing.
+type mockCommandQueue struct {
+	commands []*domain.QueuedCommand
+	err      error
+}
+
+func (m *mockCommandQueue) Enqueue(ctx context.Context, cmd *domain.QueuedCommand) error {
+	if m.err != nil {
+		return m.err
+	}
+	cmd.ID = domain.QueuedCommandID("queued-cmd-123")
+	m.commands = append(m.commands, cmd)
+	return nil
+}
+
+func (m *mockCommandQueue) Dequeue(ctx context.Context, projectID string) (*domain.QueuedCommand, error) {
+	if m.err != nil {
+		return nil, m.err
+	}
+	for _, cmd := range m.commands {
+		if cmd.ProjectID == projectID && cmd.Status == domain.QueueStatusPending {
+			cmd.Status = domain.QueueStatusRunning
+			return cmd, nil
+		}
+	}
+	return nil, nil
+}
+
+func (m *mockCommandQueue) UpdateStatus(ctx context.Context, cmdID domain.QueuedCommandID, status domain.QueueStatus, result *domain.QueuedCommandResult) error {
+	return m.err
+}
+
+func (m *mockCommandQueue) Cancel(ctx context.Context, cmdID domain.QueuedCommandID) error {
+	if m.err != nil {
+		return m.err
+	}
+	for _, cmd := range m.commands {
+		if cmd.ID == cmdID {
+			if cmd.Status != domain.QueueStatusPending {
+				return domain.ErrCommandNotFound
+			}
+			cmd.Status = domain.QueueStatusCancelled
+			return nil
+		}
+	}
+	return domain.ErrCommandNotFound
+}
+
+func (m *mockCommandQueue) GetByID(ctx context.Context, cmdID domain.QueuedCommandID) (*domain.QueuedCommand, error) {
+	if m.err != nil {
+		return nil, m.err
+	}
+	for _, cmd := range m.commands {
+		if cmd.ID == cmdID {
+			return cmd, nil
+		}
+	}
+	return nil, domain.ErrCommandNotFound
+}
+
+func (m *mockCommandQueue) List(ctx context.Context, projectID string, filters *domain.QueueFilters) ([]*domain.QueuedCommand, error) {
+	if m.err != nil {
+		return nil, m.err
+	}
+	var result []*domain.QueuedCommand
+	for _, cmd := range m.commands {
+		if cmd.ProjectID == projectID {
+			result = append(result, cmd)
+		}
+	}
+	return result, nil
+}
+
+func (m *mockCommandQueue) GetStats(ctx context.Context, projectID string) (*domain.QueueStats, error) {
+	if m.err != nil {
+		return nil, m.err
+	}
+	return &domain.QueueStats{
+		TotalPending:   1,
+		TotalRunning:   0,
+		TotalCompleted: 0,
+		TotalFailed:    0,
+		TotalCancelled: 0,
+	}, nil
+}
+
+func (m *mockCommandQueue) CleanupOld(ctx context.Context, olderThanDays int) (int64, error) {
+	return 0, m.err
+}
+
+// mockProjectRepo implements port.ProjectRepository for queue handler testing.
+type mockProjectRepo struct {
+	projects map[domain.ProjectID]*domain.Project
+}
+
+func newMockProjectRepo() *mockProjectRepo {
+	return &mockProjectRepo{
+		projects: make(map[domain.ProjectID]*domain.Project),
+	}
+}
+
+func (m *mockProjectRepo) List(ctx context.Context) ([]domain.Project, error) {
+	var result []domain.Project
+	for _, p := range m.projects {
+		result = append(result, *p)
+	}
+	return result, nil
+}
+
+func (m *mockProjectRepo) Get(ctx context.Context, id domain.ProjectID) (*domain.Project, error) {
+	if p, ok := m.projects[id]; ok {
+		return p, nil
+	}
+	return nil, domain.ErrProjectNotFound
+}
+
+func (m *mockProjectRepo) Exists(ctx context.Context, id domain.ProjectID) (bool, error) {
+	_, ok := m.projects[id]
+	return ok, nil
+}
+
+func (m *mockProjectRepo) RefreshStatus(ctx context.Context) error {
+	return nil
+}
+
+func (m *mockProjectRepo) Register(ctx context.Context, p *domain.Project) error {
+	m.projects[p.ID] = p
+	return nil
+}
+
+func (m *mockProjectRepo) Unregister(ctx context.Context, id domain.ProjectID) error {
+	delete(m.projects, id)
+	return nil
+}
+
+func TestQueueHandler_Enqueue(t *testing.T) {
+	projectRepo := newMockProjectRepo()
+	projectRepo.Register(context.Background(), &domain.Project{ID: "proj-1", Name: "Test Project"})
+
+	tests := []struct {
+		name       string
+		projectID  string
+		body       EnqueueRequest
+		wantStatus int
+	}{
+		{
+			name:      "valid claude command",
+			projectID: "proj-1",
+			body: EnqueueRequest{
+				Command:     "explain this code",
+				CommandType: "claude",
+			},
+			wantStatus: http.StatusCreated,
+		},
+		{
+			name:      "valid shell command",
+			projectID: "proj-1",
+			body: EnqueueRequest{
+				Command:     "ls -la",
+				CommandType: "shell",
+			},
+			wantStatus: http.StatusCreated,
+		},
+		{
+			name:      "valid git command",
+			projectID: "proj-1",
+			body: EnqueueRequest{
+				Command:     `["status"]`,
+				CommandType: "git",
+			},
+			wantStatus: http.StatusCreated,
+		},
+		{
+			name:      "invalid command type",
+			projectID: "proj-1",
+			body: EnqueueRequest{
+				Command:     "test",
+				CommandType: "invalid",
+			},
+			wantStatus: http.StatusBadRequest,
+		},
+		{
+			name:      "empty command",
+			projectID: "proj-1",
+			body: EnqueueRequest{
+				Command:     "",
+				CommandType: "claude",
+			},
+			wantStatus: http.StatusBadRequest,
+		},
+		{
+			name:      "project not found",
+			projectID: "unknown",
+			body: EnqueueRequest{
+				Command:     "test",
+				CommandType: "claude",
+			},
+			wantStatus: http.StatusNotFound,
+		},
+		{
+			name:      "dangerous shell command",
+			projectID: "proj-1",
+			body: EnqueueRequest{
+				Command:     "rm -rf /",
+				CommandType: "shell",
+			},
+			wantStatus: http.StatusBadRequest,
+		},
+		{
+			name:      "invalid git command format",
+			projectID: "proj-1",
+			body: EnqueueRequest{
+				Command:     "not json array",
+				CommandType: "git",
+			},
+			wantStatus: http.StatusBadRequest,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			queue := &mockCommandQueue{}
+			h := NewQueueHandler(queue, projectRepo)
+
+			r := chi.NewRouter()
+			r.Post("/projects/{id}/queue/", h.Enqueue)
+
+			body, _ := json.Marshal(tt.body)
+			req := httptest.NewRequest(http.MethodPost, "/projects/"+tt.projectID+"/queue/", bytes.NewReader(body))
+			req.Header.Set("Content-Type", "application/json")
+			w := httptest.NewRecorder()
+
+			r.ServeHTTP(w, req)
+
+			if w.Code != tt.wantStatus {
+				t.Errorf("Enqueue() status = %d, want %d, body: %s", w.Code, tt.wantStatus, w.Body.String())
+			}
+		})
+	}
+}
+
+func TestQueueHandler_List(t *testing.T) {
+	projectRepo := newMockProjectRepo()
+	projectRepo.Register(context.Background(), &domain.Project{ID: "proj-1", Name: "Test Project"})
+
+	queue := &mockCommandQueue{
+		commands: []*domain.QueuedCommand{
+			{
+				ID:          "cmd-1",
+				ProjectID:   "proj-1",
+				Command:     "test",
+				CommandType: domain.CommandTypeClaude,
+				Status:      domain.QueueStatusPending,
+				CreatedAt:   time.Now(),
+			},
+		},
+	}
+
+	tests := []struct {
+		name       string
+		projectID  string
+		query      string
+		wantStatus int
+		wantCount  int
+	}{
+		{
+			name:       "list all commands",
+			projectID:  "proj-1",
+			query:      "",
+			wantStatus: http.StatusOK,
+			wantCount:  1,
+		},
+		{
+			name:       "project not found",
+			projectID:  "unknown",
+			query:      "",
+			wantStatus: http.StatusNotFound,
+		},
+		{
+			name:       "with limit",
+			projectID:  "proj-1",
+			query:      "?limit=10",
+			wantStatus: http.StatusOK,
+			wantCount:  1,
+		},
+		{
+			name:       "with offset",
+			projectID:  "proj-1",
+			query:      "?offset=0",
+			wantStatus: http.StatusOK,
+			wantCount:  1,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			h := NewQueueHandler(queue, projectRepo)
+
+			r := chi.NewRouter()
+			r.Get("/projects/{id}/queue/", h.List)
+
+			req := httptest.NewRequest(http.MethodGet, "/projects/"+tt.projectID+"/queue/"+tt.query, nil)
+			w := httptest.NewRecorder()
+
+			r.ServeHTTP(w, req)
+
+			if w.Code != tt.wantStatus {
+				t.Errorf("List() status = %d, want %d", w.Code, tt.wantStatus)
+			}
+
+			if tt.wantStatus == http.StatusOK {
+				var resp struct {
+					Data ListResponse `json:"data"`
+				}
+				if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+					t.Fatalf("failed to unmarshal response: %v", err)
+				}
+				if len(resp.Data.Commands) != tt.wantCount {
+					t.Errorf("List() count = %d, want %d", len(resp.Data.Commands), tt.wantCount)
+				}
+			}
+		})
+	}
+}
+
+func TestQueueHandler_GetByID(t *testing.T) {
+	projectRepo := newMockProjectRepo()
+	projectRepo.Register(context.Background(), &domain.Project{ID: "proj-1", Name: "Test Project"})
+
+	queue := &mockCommandQueue{
+		commands: []*domain.QueuedCommand{
+			{
+				ID:          "cmd-123",
+				ProjectID:   "proj-1",
+				Command:     "test",
+				CommandType: domain.CommandTypeClaude,
+				Status:      domain.QueueStatusPending,
+				CreatedAt:   time.Now(),
+			},
+		},
+	}
+
+	tests := []struct {
+		name       string
+		projectID  string
+		cmdID      string
+		wantStatus int
+	}{
+		{
+			name:       "existing command",
+			projectID:  "proj-1",
+			cmdID:      "cmd-123",
+			wantStatus: http.StatusOK,
+		},
+		{
+			name:       "non-existent command",
+			projectID:  "proj-1",
+			cmdID:      "cmd-unknown",
+			wantStatus: http.StatusNotFound,
+		},
+		{
+			name:       "project not found",
+			projectID:  "unknown",
+			cmdID:      "cmd-123",
+			wantStatus: http.StatusNotFound,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			h := NewQueueHandler(queue, projectRepo)
+
+			r := chi.NewRouter()
+			r.Get("/projects/{id}/queue/{cmdId}", h.GetByID)
+
+			req := httptest.NewRequest(http.MethodGet, "/projects/"+tt.projectID+"/queue/"+tt.cmdID, nil)
+			w := httptest.NewRecorder()
+
+			r.ServeHTTP(w, req)
+
+			if w.Code != tt.wantStatus {
+				t.Errorf("GetByID() status = %d, want %d", w.Code, tt.wantStatus)
+			}
+		})
+	}
+}
+
+func TestQueueHandler_Cancel(t *testing.T) {
+	projectRepo := newMockProjectRepo()
+	projectRepo.Register(context.Background(), &domain.Project{ID: "proj-1", Name: "Test Project"})
+
+	tests := []struct {
+		name       string
+		projectID  string
+		cmdID      string
+		commands   []*domain.QueuedCommand
+		wantStatus int
+	}{
+		{
+			name:      "cancel pending command",
+			projectID: "proj-1",
+			cmdID:     "cmd-123",
+			commands: []*domain.QueuedCommand{
+				{
+					ID:        "cmd-123",
+					ProjectID: "proj-1",
+					Status:    domain.QueueStatusPending,
+				},
+			},
+			wantStatus: http.StatusOK,
+		},
+		{
+			name:       "command not found",
+			projectID:  "proj-1",
+			cmdID:      "cmd-unknown",
+			commands:   nil,
+			wantStatus: http.StatusNotFound,
+		},
+		{
+			name:       "project not found",
+			projectID:  "unknown",
+			cmdID:      "cmd-123",
+			commands:   nil,
+			wantStatus: http.StatusNotFound,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			queue := &mockCommandQueue{commands: tt.commands}
+			h := NewQueueHandler(queue, projectRepo)
+
+			r := chi.NewRouter()
+			r.Delete("/projects/{id}/queue/{cmdId}", h.Cancel)
+
+			req := httptest.NewRequest(http.MethodDelete, "/projects/"+tt.projectID+"/queue/"+tt.cmdID, nil)
+			w := httptest.NewRecorder()
+
+			r.ServeHTTP(w, req)
+
+			if w.Code != tt.wantStatus {
+				t.Errorf("Cancel() status = %d, want %d, body: %s", w.Code, tt.wantStatus, w.Body.String())
+			}
+		})
+	}
+}
+
+func TestQueueHandler_Stats(t *testing.T) {
+	projectRepo := newMockProjectRepo()
+	projectRepo.Register(context.Background(), &domain.Project{ID: "proj-1", Name: "Test Project"})
+
+	queue := &mockCommandQueue{}
+
+	tests := []struct {
+		name       string
+		projectID  string
+		wantStatus int
+	}{
+		{
+			name:       "get stats",
+			projectID:  "proj-1",
+			wantStatus: http.StatusOK,
+		},
+		{
+			name:       "project not found",
+			projectID:  "unknown",
+			wantStatus: http.StatusNotFound,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			h := NewQueueHandler(queue, projectRepo)
+
+			r := chi.NewRouter()
+			r.Get("/projects/{id}/queue/stats", h.Stats)
+
+			req := httptest.NewRequest(http.MethodGet, "/projects/"+tt.projectID+"/queue/stats", nil)
+			w := httptest.NewRecorder()
+
+			r.ServeHTTP(w, req)
+
+			if w.Code != tt.wantStatus {
+				t.Errorf("Stats() status = %d, want %d", w.Code, tt.wantStatus)
+			}
+		})
+	}
+}
+
+func TestQueueHandler_Enqueue_CommandSizeLimit(t *testing.T) {
+	projectRepo := newMockProjectRepo()
+	projectRepo.Register(context.Background(), &domain.Project{ID: "proj-1", Name: "Test Project"})
+	queue := &mockCommandQueue{}
+	h := NewQueueHandler(queue, projectRepo)
+
+	r := chi.NewRouter()
+	r.Post("/projects/{id}/queue/", h.Enqueue)
+
+	// Create a command that exceeds MaxCommandSize (10KB)
+	largeCommand := make([]byte, MaxCommandSize+1)
+	for i := range largeCommand {
+		largeCommand[i] = 'a'
+	}
+
+	body := EnqueueRequest{
+		Command:     string(largeCommand),
+		CommandType: "claude",
+	}
+
+	bodyBytes, _ := json.Marshal(body)
+	req := httptest.NewRequest(http.MethodPost, "/projects/proj-1/queue/", bytes.NewReader(bodyBytes))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+
+	r.ServeHTTP(w, req)
+
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("Enqueue() with large command status = %d, want %d", w.Code, http.StatusBadRequest)
+	}
+}
--- a/internal/handlers/webhooks.go
+++ b/internal/handlers/webhooks.go
@ -0,0 +1,476 @@
+// Package handlers provides HTTP handlers for the rdev API.
+package handlers
+
+import (
+	"crypto/rand"
+	"encoding/hex"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/http"
+	"net/url"
+	"strconv"
+
+	"github.com/go-chi/chi/v5"
+	"github.com/google/uuid"
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/port"
+	"github.com/orchard9/rdev/pkg/api"
+)
+
+// WebhookHandler handles webhook management endpoints.
+type WebhookHandler struct {
+	webhooks port.WebhookRepository
+	projects port.ProjectRepository
+}
+
+// NewWebhookHandler creates a new webhook handler.
+func NewWebhookHandler(webhooks port.WebhookRepository, projects port.ProjectRepository) *WebhookHandler {
+	return &WebhookHandler{
+		webhooks: webhooks,
+		projects: projects,
+	}
+}
+
+// Mount registers the webhook routes.
+func (h *WebhookHandler) Mount(r api.Router) {
+	r.Route("/projects/{id}/webhooks", func(r chi.Router) {
+		r.Post("/", h.Create)
+		r.Get("/", h.List)
+		r.Get("/{webhookId}", h.Get)
+		r.Put("/{webhookId}", h.Update)
+		r.Delete("/{webhookId}", h.Delete)
+		r.Get("/{webhookId}/deliveries", h.GetDeliveries)
+	})
+}
+
+// CreateWebhookRequest is the request body for POST /projects/{id}/webhooks.
+type CreateWebhookRequest struct {
+	URL    string   `json:"url"`
+	Events []string `json:"events"`
+	Secret string   `json:"secret,omitempty"` // If empty, one will be generated
+}
+
+// CreateWebhookResponse is the response for POST /projects/{id}/webhooks.
+type CreateWebhookResponse struct {
+	Webhook *WebhookDTO `json:"webhook"`
+	Secret  string      `json:"secret"` // Only returned on creation
+}
+
+// WebhookDTO is the data transfer object for webhooks.
+type WebhookDTO struct {
+	ID        string   `json:"id"`
+	ProjectID string   `json:"project_id"`
+	URL       string   `json:"url"`
+	Events    []string `json:"events"`
+	Enabled   bool     `json:"enabled"`
+	HasSecret bool     `json:"has_secret"`
+	CreatedAt string   `json:"created_at"`
+	UpdatedAt string   `json:"updated_at"`
+}
+
+// toDTO converts a domain.Webhook to a WebhookDTO.
+func toDTO(w *domain.Webhook) *WebhookDTO {
+	events := make([]string, len(w.Events))
+	for i, e := range w.Events {
+		events[i] = string(e)
+	}
+	return &WebhookDTO{
+		ID:        string(w.ID),
+		ProjectID: w.ProjectID,
+		URL:       w.URL,
+		Events:    events,
+		Enabled:   w.Enabled,
+		HasSecret: w.HasSecret(),
+		CreatedAt: w.CreatedAt.Format("2006-01-02T15:04:05Z07:00"),
+		UpdatedAt: w.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"),
+	}
+}
+
+// Create creates a new webhook.
+// POST /projects/{id}/webhooks
+func (h *WebhookHandler) Create(w http.ResponseWriter, r *http.Request) {
+	projectID := chi.URLParam(r, "id")
+
+	// Check project exists
+	exists, err := h.projects.Exists(r.Context(), domain.ProjectID(projectID))
+	if err != nil {
+		api.WriteInternalError(w, r, "failed to check project")
+		return
+	}
+	if !exists {
+		api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", projectID))
+		return
+	}
+
+	// Parse request
+	var req CreateWebhookRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		api.WriteBadRequest(w, r, "invalid request body")
+		return
+	}
+
+	// Validate URL
+	if req.URL == "" {
+		api.WriteBadRequest(w, r, "url is required")
+		return
+	}
+	parsedURL, err := url.Parse(req.URL)
+	if err != nil || (parsedURL.Scheme != "http" && parsedURL.Scheme != "https") {
+		api.WriteBadRequest(w, r, "url must be a valid HTTP or HTTPS URL")
+		return
+	}
+
+	// Validate events
+	if len(req.Events) == 0 {
+		api.WriteBadRequest(w, r, "at least one event type is required")
+		return
+	}
+	events := make([]domain.WebhookEventType, len(req.Events))
+	for i, e := range req.Events {
+		eventType := domain.WebhookEventType(e)
+		if !eventType.IsValid() {
+			api.WriteBadRequest(w, r, fmt.Sprintf("invalid event type: %s", e))
+			return
+		}
+		events[i] = eventType
+	}
+
+	// Generate secret if not provided
+	secret := req.Secret
+	if secret == "" {
+		secretBytes := make([]byte, 32)
+		if _, err := rand.Read(secretBytes); err != nil {
+			api.WriteInternalError(w, r, "failed to generate secret")
+			return
+		}
+		secret = hex.EncodeToString(secretBytes)
+	}
+
+	// Create webhook
+	webhook := &domain.Webhook{
+		ID:        domain.WebhookID(uuid.New().String()),
+		ProjectID: projectID,
+		URL:       req.URL,
+		Secret:    secret,
+		Events:    events,
+		Enabled:   true,
+	}
+
+	if err := h.webhooks.Create(r.Context(), webhook); err != nil {
+		api.WriteInternalError(w, r, "failed to create webhook")
+		return
+	}
+
+	api.WriteCreated(w, r, CreateWebhookResponse{
+		Webhook: toDTO(webhook),
+		Secret:  secret, // Only returned on creation
+	})
+}
+
+// List returns all webhooks for a project.
+// GET /projects/{id}/webhooks
+func (h *WebhookHandler) List(w http.ResponseWriter, r *http.Request) {
+	projectID := chi.URLParam(r, "id")
+
+	// Check project exists
+	exists, err := h.projects.Exists(r.Context(), domain.ProjectID(projectID))
+	if err != nil {
+		api.WriteInternalError(w, r, "failed to check project")
+		return
+	}
+	if !exists {
+		api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", projectID))
+		return
+	}
+
+	webhooks, err := h.webhooks.ListByProject(r.Context(), projectID)
+	if err != nil {
+		api.WriteInternalError(w, r, "failed to list webhooks")
+		return
+	}
+
+	dtos := make([]*WebhookDTO, len(webhooks))
+	for i, wh := range webhooks {
+		dtos[i] = toDTO(wh)
+	}
+
+	api.WriteSuccess(w, r, map[string]any{
+		"webhooks": dtos,
+		"total":    len(dtos),
+	})
+}
+
+// Get returns a specific webhook.
+// GET /projects/{id}/webhooks/{webhookId}
+func (h *WebhookHandler) Get(w http.ResponseWriter, r *http.Request) {
+	projectID := chi.URLParam(r, "id")
+	webhookID := chi.URLParam(r, "webhookId")
+
+	// Check project exists
+	exists, err := h.projects.Exists(r.Context(), domain.ProjectID(projectID))
+	if err != nil {
+		api.WriteInternalError(w, r, "failed to check project")
+		return
+	}
+	if !exists {
+		api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", projectID))
+		return
+	}
+
+	webhook, err := h.webhooks.GetByID(r.Context(), domain.WebhookID(webhookID))
+	if err != nil {
+		if errors.Is(err, domain.ErrWebhookNotFound) {
+			api.WriteNotFound(w, r, fmt.Sprintf("webhook not found: %s", webhookID))
+			return
+		}
+		api.WriteInternalError(w, r, "failed to get webhook")
+		return
+	}
+
+	// Verify webhook belongs to project
+	if webhook.ProjectID != projectID {
+		api.WriteNotFound(w, r, fmt.Sprintf("webhook not found: %s", webhookID))
+		return
+	}
+
+	api.WriteSuccess(w, r, toDTO(webhook))
+}
+
+// UpdateWebhookRequest is the request body for PUT /projects/{id}/webhooks/{webhookId}.
+type UpdateWebhookRequest struct {
+	URL     string   `json:"url,omitempty"`
+	Events  []string `json:"events,omitempty"`
+	Secret  string   `json:"secret,omitempty"`
+	Enabled *bool    `json:"enabled,omitempty"`
+}
+
+// Update updates a webhook.
+// PUT /projects/{id}/webhooks/{webhookId}
+func (h *WebhookHandler) Update(w http.ResponseWriter, r *http.Request) {
+	projectID := chi.URLParam(r, "id")
+	webhookID := chi.URLParam(r, "webhookId")
+
+	// Check project exists
+	exists, err := h.projects.Exists(r.Context(), domain.ProjectID(projectID))
+	if err != nil {
+		api.WriteInternalError(w, r, "failed to check project")
+		return
+	}
+	if !exists {
+		api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", projectID))
+		return
+	}
+
+	// Get existing webhook
+	webhook, err := h.webhooks.GetByID(r.Context(), domain.WebhookID(webhookID))
+	if err != nil {
+		if errors.Is(err, domain.ErrWebhookNotFound) {
+			api.WriteNotFound(w, r, fmt.Sprintf("webhook not found: %s", webhookID))
+			return
+		}
+		api.WriteInternalError(w, r, "failed to get webhook")
+		return
+	}
+
+	// Verify webhook belongs to project
+	if webhook.ProjectID != projectID {
+		api.WriteNotFound(w, r, fmt.Sprintf("webhook not found: %s", webhookID))
+		return
+	}
+
+	// Parse request
+	var req UpdateWebhookRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		api.WriteBadRequest(w, r, "invalid request body")
+		return
+	}
+
+	// Update fields
+	if req.URL != "" {
+		parsedURL, err := url.Parse(req.URL)
+		if err != nil || (parsedURL.Scheme != "http" && parsedURL.Scheme != "https") {
+			api.WriteBadRequest(w, r, "url must be a valid HTTP or HTTPS URL")
+			return
+		}
+		webhook.URL = req.URL
+	}
+
+	if len(req.Events) > 0 {
+		events := make([]domain.WebhookEventType, len(req.Events))
+		for i, e := range req.Events {
+			eventType := domain.WebhookEventType(e)
+			if !eventType.IsValid() {
+				api.WriteBadRequest(w, r, fmt.Sprintf("invalid event type: %s", e))
+				return
+			}
+			events[i] = eventType
+		}
+		webhook.Events = events
+	}
+
+	if req.Secret != "" {
+		webhook.Secret = req.Secret
+	}
+
+	if req.Enabled != nil {
+		webhook.Enabled = *req.Enabled
+	}
+
+	if err := h.webhooks.Update(r.Context(), webhook); err != nil {
+		api.WriteInternalError(w, r, "failed to update webhook")
+		return
+	}
+
+	api.WriteSuccess(w, r, toDTO(webhook))
+}
+
+// Delete deletes a webhook.
+// DELETE /projects/{id}/webhooks/{webhookId}
+func (h *WebhookHandler) Delete(w http.ResponseWriter, r *http.Request) {
+	projectID := chi.URLParam(r, "id")
+	webhookID := chi.URLParam(r, "webhookId")
+
+	// Check project exists
+	exists, err := h.projects.Exists(r.Context(), domain.ProjectID(projectID))
+	if err != nil {
+		api.WriteInternalError(w, r, "failed to check project")
+		return
+	}
+	if !exists {
+		api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", projectID))
+		return
+	}
+
+	// Verify webhook belongs to project
+	webhook, err := h.webhooks.GetByID(r.Context(), domain.WebhookID(webhookID))
+	if err != nil {
+		if errors.Is(err, domain.ErrWebhookNotFound) {
+			api.WriteNotFound(w, r, fmt.Sprintf("webhook not found: %s", webhookID))
+			return
+		}
+		api.WriteInternalError(w, r, "failed to get webhook")
+		return
+	}
+
+	if webhook.ProjectID != projectID {
+		api.WriteNotFound(w, r, fmt.Sprintf("webhook not found: %s", webhookID))
+		return
+	}
+
+	if err := h.webhooks.Delete(r.Context(), domain.WebhookID(webhookID)); err != nil {
+		if errors.Is(err, domain.ErrWebhookNotFound) {
+			api.WriteNotFound(w, r, fmt.Sprintf("webhook not found: %s", webhookID))
+			return
+		}
+		api.WriteInternalError(w, r, "failed to delete webhook")
+		return
+	}
+
+	api.WriteSuccess(w, r, map[string]any{
+		"id":      webhookID,
+		"deleted": true,
+	})
+}
+
+// DeliveryDTO is the data transfer object for webhook deliveries.
+type DeliveryDTO struct {
+	ID             string `json:"id"`
+	WebhookID      string `json:"webhook_id"`
+	EventType      string `json:"event_type"`
+	Payload        string `json:"payload"`
+	ResponseStatus int    `json:"response_status,omitempty"`
+	ResponseBody   string `json:"response_body,omitempty"`
+	DeliveredAt    string `json:"delivered_at"`
+	Success        bool   `json:"success"`
+	RetryCount     int    `json:"retry_count"`
+	ErrorMessage   string `json:"error_message,omitempty"`
+}
+
+// GetDeliveries returns delivery history for a webhook.
+// GET /projects/{id}/webhooks/{webhookId}/deliveries
+func (h *WebhookHandler) GetDeliveries(w http.ResponseWriter, r *http.Request) {
+	projectID := chi.URLParam(r, "id")
+	webhookID := chi.URLParam(r, "webhookId")
+
+	// Check project exists
+	exists, err := h.projects.Exists(r.Context(), domain.ProjectID(projectID))
+	if err != nil {
+		api.WriteInternalError(w, r, "failed to check project")
+		return
+	}
+	if !exists {
+		api.WriteNotFound(w, r, fmt.Sprintf("project not found: %s", projectID))
+		return
+	}
+
+	// Verify webhook belongs to project
+	webhook, err := h.webhooks.GetByID(r.Context(), domain.WebhookID(webhookID))
+	if err != nil {
+		if errors.Is(err, domain.ErrWebhookNotFound) {
+			api.WriteNotFound(w, r, fmt.Sprintf("webhook not found: %s", webhookID))
+			return
+		}
+		api.WriteInternalError(w, r, "failed to get webhook")
+		return
+	}
+
+	if webhook.ProjectID != projectID {
+		api.WriteNotFound(w, r, fmt.Sprintf("webhook not found: %s", webhookID))
+		return
+	}
+
+	// Parse query params
+	filters := domain.DefaultWebhookDeliveryFilters()
+
+	if eventType := r.URL.Query().Get("event_type"); eventType != "" {
+		et := domain.WebhookEventType(eventType)
+		filters.EventType = &et
+	}
+
+	if successStr := r.URL.Query().Get("success"); successStr != "" {
+		success := successStr == "true"
+		filters.Success = &success
+	}
+
+	if limit := r.URL.Query().Get("limit"); limit != "" {
+		if l, err := strconv.Atoi(limit); err == nil && l > 0 && l <= 1000 {
+			filters.Limit = l
+		}
+	}
+
+	if offset := r.URL.Query().Get("offset"); offset != "" {
+		if o, err := strconv.Atoi(offset); err == nil && o >= 0 {
+			filters.Offset = o
+		}
+	}
+
+	deliveries, err := h.webhooks.GetDeliveries(r.Context(), domain.WebhookID(webhookID), filters)
+	if err != nil {
+		api.WriteInternalError(w, r, "failed to get deliveries")
+		return
+	}
+
+	dtos := make([]*DeliveryDTO, len(deliveries))
+	for i, d := range deliveries {
+		dtos[i] = &DeliveryDTO{
+			ID:             string(d.ID),
+			WebhookID:      string(d.WebhookID),
+			EventType:      string(d.EventType),
+			Payload:        d.Payload,
+			ResponseStatus: d.ResponseStatus,
+			ResponseBody:   d.ResponseBody,
+			DeliveredAt:    d.DeliveredAt.Format("2006-01-02T15:04:05Z07:00"),
+			Success:        d.Success,
+			RetryCount:     d.RetryCount,
+			ErrorMessage:   d.ErrorMessage,
+		}
+	}
+
+	api.WriteSuccess(w, r, map[string]any{
+		"deliveries": dtos,
+		"total":      len(dtos),
+		"limit":      filters.Limit,
+		"offset":     filters.Offset,
+	})
+}
--- a/internal/handlers/webhooks_test.go
+++ b/internal/handlers/webhooks_test.go
@ -0,0 +1,609 @@
+package handlers
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/go-chi/chi/v5"
+	"github.com/orchard9/rdev/internal/domain"
+)
+
+// mockWebhookRepository implements port.WebhookRepository for testing.
+type mockWebhookRepository struct {
+	webhooks   []*domain.Webhook
+	deliveries []*domain.WebhookDelivery
+	err        error
+}
+
+func (m *mockWebhookRepository) Create(ctx context.Context, webhook *domain.Webhook) error {
+	if m.err != nil {
+		return m.err
+	}
+	webhook.CreatedAt = time.Now()
+	webhook.UpdatedAt = time.Now()
+	m.webhooks = append(m.webhooks, webhook)
+	return nil
+}
+
+func (m *mockWebhookRepository) GetByID(ctx context.Context, id domain.WebhookID) (*domain.Webhook, error) {
+	if m.err != nil {
+		return nil, m.err
+	}
+	for _, w := range m.webhooks {
+		if w.ID == id {
+			return w, nil
+		}
+	}
+	return nil, domain.ErrWebhookNotFound
+}
+
+func (m *mockWebhookRepository) ListByProject(ctx context.Context, projectID string) ([]*domain.Webhook, error) {
+	if m.err != nil {
+		return nil, m.err
+	}
+	var result []*domain.Webhook
+	for _, w := range m.webhooks {
+		if w.ProjectID == projectID {
+			result = append(result, w)
+		}
+	}
+	return result, nil
+}
+
+func (m *mockWebhookRepository) ListEnabledByProjectAndEvent(ctx context.Context, projectID string, eventType domain.WebhookEventType) ([]*domain.Webhook, error) {
+	if m.err != nil {
+		return nil, m.err
+	}
+	var result []*domain.Webhook
+	for _, w := range m.webhooks {
+		if w.ProjectID == projectID && w.Enabled {
+			for _, e := range w.Events {
+				if e == eventType {
+					result = append(result, w)
+					break
+				}
+			}
+		}
+	}
+	return result, nil
+}
+
+func (m *mockWebhookRepository) Update(ctx context.Context, webhook *domain.Webhook) error {
+	if m.err != nil {
+		return m.err
+	}
+	for i, w := range m.webhooks {
+		if w.ID == webhook.ID {
+			m.webhooks[i] = webhook
+			return nil
+		}
+	}
+	return domain.ErrWebhookNotFound
+}
+
+func (m *mockWebhookRepository) Delete(ctx context.Context, id domain.WebhookID) error {
+	if m.err != nil {
+		return m.err
+	}
+	for i, w := range m.webhooks {
+		if w.ID == id {
+			m.webhooks = append(m.webhooks[:i], m.webhooks[i+1:]...)
+			return nil
+		}
+	}
+	return domain.ErrWebhookNotFound
+}
+
+func (m *mockWebhookRepository) RecordDelivery(ctx context.Context, delivery *domain.WebhookDelivery) error {
+	if m.err != nil {
+		return m.err
+	}
+	m.deliveries = append(m.deliveries, delivery)
+	return nil
+}
+
+func (m *mockWebhookRepository) GetDeliveries(ctx context.Context, webhookID domain.WebhookID, filters *domain.WebhookDeliveryFilters) ([]*domain.WebhookDelivery, error) {
+	if m.err != nil {
+		return nil, m.err
+	}
+	var result []*domain.WebhookDelivery
+	for _, d := range m.deliveries {
+		if d.WebhookID == webhookID {
+			result = append(result, d)
+		}
+	}
+	return result, nil
+}
+
+func (m *mockWebhookRepository) CleanupOldDeliveries(ctx context.Context, olderThanDays int) (int64, error) {
+	return 0, m.err
+}
+
+func TestWebhookHandler_Create(t *testing.T) {
+	projectRepo := newMockProjectRepo()
+	projectRepo.Register(context.Background(), &domain.Project{ID: "proj-1", Name: "Test Project"})
+
+	tests := []struct {
+		name       string
+		projectID  string
+		body       CreateWebhookRequest
+		wantStatus int
+	}{
+		{
+			name:      "valid webhook",
+			projectID: "proj-1",
+			body: CreateWebhookRequest{
+				URL:    "https://example.com/webhook",
+				Events: []string{"command.started", "command.completed"},
+			},
+			wantStatus: http.StatusCreated,
+		},
+		{
+			name:      "with custom secret",
+			projectID: "proj-1",
+			body: CreateWebhookRequest{
+				URL:    "https://example.com/webhook",
+				Events: []string{"command.started"},
+				Secret: "my-secret-key",
+			},
+			wantStatus: http.StatusCreated,
+		},
+		{
+			name:      "missing url",
+			projectID: "proj-1",
+			body: CreateWebhookRequest{
+				Events: []string{"command.started"},
+			},
+			wantStatus: http.StatusBadRequest,
+		},
+		{
+			name:      "invalid url",
+			projectID: "proj-1",
+			body: CreateWebhookRequest{
+				URL:    "not-a-valid-url",
+				Events: []string{"command.started"},
+			},
+			wantStatus: http.StatusBadRequest,
+		},
+		{
+			name:      "missing events",
+			projectID: "proj-1",
+			body: CreateWebhookRequest{
+				URL:    "https://example.com/webhook",
+				Events: []string{},
+			},
+			wantStatus: http.StatusBadRequest,
+		},
+		{
+			name:      "invalid event type",
+			projectID: "proj-1",
+			body: CreateWebhookRequest{
+				URL:    "https://example.com/webhook",
+				Events: []string{"invalid.event"},
+			},
+			wantStatus: http.StatusBadRequest,
+		},
+		{
+			name:      "project not found",
+			projectID: "unknown",
+			body: CreateWebhookRequest{
+				URL:    "https://example.com/webhook",
+				Events: []string{"command.started"},
+			},
+			wantStatus: http.StatusNotFound,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			webhookRepo := &mockWebhookRepository{}
+			h := NewWebhookHandler(webhookRepo, projectRepo)
+
+			r := chi.NewRouter()
+			r.Post("/projects/{id}/webhooks/", h.Create)
+
+			body, _ := json.Marshal(tt.body)
+			req := httptest.NewRequest(http.MethodPost, "/projects/"+tt.projectID+"/webhooks/", bytes.NewReader(body))
+			req.Header.Set("Content-Type", "application/json")
+			w := httptest.NewRecorder()
+
+			r.ServeHTTP(w, req)
+
+			if w.Code != tt.wantStatus {
+				t.Errorf("Create() status = %d, want %d, body: %s", w.Code, tt.wantStatus, w.Body.String())
+			}
+
+			if tt.wantStatus == http.StatusCreated {
+				var resp struct {
+					Data CreateWebhookResponse `json:"data"`
+				}
+				if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+					t.Fatalf("failed to unmarshal response: %v", err)
+				}
+				if resp.Data.Secret == "" {
+					t.Error("Secret should be returned on creation")
+				}
+			}
+		})
+	}
+}
+
+func TestWebhookHandler_List(t *testing.T) {
+	projectRepo := newMockProjectRepo()
+	projectRepo.Register(context.Background(), &domain.Project{ID: "proj-1", Name: "Test Project"})
+
+	webhookRepo := &mockWebhookRepository{
+		webhooks: []*domain.Webhook{
+			{
+				ID:        "wh-1",
+				ProjectID: "proj-1",
+				URL:       "https://example.com/webhook1",
+				Events:    []domain.WebhookEventType{domain.WebhookEventCommandStarted},
+				Enabled:   true,
+				CreatedAt: time.Now(),
+				UpdatedAt: time.Now(),
+			},
+			{
+				ID:        "wh-2",
+				ProjectID: "proj-1",
+				URL:       "https://example.com/webhook2",
+				Events:    []domain.WebhookEventType{domain.WebhookEventCommandCompleted},
+				Enabled:   false,
+				CreatedAt: time.Now(),
+				UpdatedAt: time.Now(),
+			},
+		},
+	}
+
+	tests := []struct {
+		name       string
+		projectID  string
+		wantStatus int
+		wantCount  int
+	}{
+		{
+			name:       "list webhooks",
+			projectID:  "proj-1",
+			wantStatus: http.StatusOK,
+			wantCount:  2,
+		},
+		{
+			name:       "project not found",
+			projectID:  "unknown",
+			wantStatus: http.StatusNotFound,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			h := NewWebhookHandler(webhookRepo, projectRepo)
+
+			r := chi.NewRouter()
+			r.Get("/projects/{id}/webhooks/", h.List)
+
+			req := httptest.NewRequest(http.MethodGet, "/projects/"+tt.projectID+"/webhooks/", nil)
+			w := httptest.NewRecorder()
+
+			r.ServeHTTP(w, req)
+
+			if w.Code != tt.wantStatus {
+				t.Errorf("List() status = %d, want %d", w.Code, tt.wantStatus)
+			}
+
+			if tt.wantStatus == http.StatusOK {
+				var resp struct {
+					Data struct {
+						Webhooks []*WebhookDTO `json:"webhooks"`
+						Total    int           `json:"total"`
+					} `json:"data"`
+				}
+				if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+					t.Fatalf("failed to unmarshal response: %v", err)
+				}
+				if resp.Data.Total != tt.wantCount {
+					t.Errorf("List() count = %d, want %d", resp.Data.Total, tt.wantCount)
+				}
+			}
+		})
+	}
+}
+
+func TestWebhookHandler_Get(t *testing.T) {
+	projectRepo := newMockProjectRepo()
+	projectRepo.Register(context.Background(), &domain.Project{ID: "proj-1", Name: "Test Project"})
+
+	webhookRepo := &mockWebhookRepository{
+		webhooks: []*domain.Webhook{
+			{
+				ID:        "wh-123",
+				ProjectID: "proj-1",
+				URL:       "https://example.com/webhook",
+				Events:    []domain.WebhookEventType{domain.WebhookEventCommandStarted},
+				Enabled:   true,
+				CreatedAt: time.Now(),
+				UpdatedAt: time.Now(),
+			},
+		},
+	}
+
+	tests := []struct {
+		name       string
+		projectID  string
+		webhookID  string
+		wantStatus int
+	}{
+		{
+			name:       "existing webhook",
+			projectID:  "proj-1",
+			webhookID:  "wh-123",
+			wantStatus: http.StatusOK,
+		},
+		{
+			name:       "webhook not found",
+			projectID:  "proj-1",
+			webhookID:  "wh-unknown",
+			wantStatus: http.StatusNotFound,
+		},
+		{
+			name:       "project not found",
+			projectID:  "unknown",
+			webhookID:  "wh-123",
+			wantStatus: http.StatusNotFound,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			h := NewWebhookHandler(webhookRepo, projectRepo)
+
+			r := chi.NewRouter()
+			r.Get("/projects/{id}/webhooks/{webhookId}", h.Get)
+
+			req := httptest.NewRequest(http.MethodGet, "/projects/"+tt.projectID+"/webhooks/"+tt.webhookID, nil)
+			w := httptest.NewRecorder()
+
+			r.ServeHTTP(w, req)
+
+			if w.Code != tt.wantStatus {
+				t.Errorf("Get() status = %d, want %d", w.Code, tt.wantStatus)
+			}
+		})
+	}
+}
+
+func TestWebhookHandler_Update(t *testing.T) {
+	projectRepo := newMockProjectRepo()
+	projectRepo.Register(context.Background(), &domain.Project{ID: "proj-1", Name: "Test Project"})
+
+	tests := []struct {
+		name       string
+		projectID  string
+		webhookID  string
+		body       UpdateWebhookRequest
+		wantStatus int
+	}{
+		{
+			name:      "update url",
+			projectID: "proj-1",
+			webhookID: "wh-123",
+			body: UpdateWebhookRequest{
+				URL: "https://new-url.com/webhook",
+			},
+			wantStatus: http.StatusOK,
+		},
+		{
+			name:      "disable webhook",
+			projectID: "proj-1",
+			webhookID: "wh-123",
+			body: UpdateWebhookRequest{
+				Enabled: boolPtr(false),
+			},
+			wantStatus: http.StatusOK,
+		},
+		{
+			name:      "invalid url",
+			projectID: "proj-1",
+			webhookID: "wh-123",
+			body: UpdateWebhookRequest{
+				URL: "not-a-url",
+			},
+			wantStatus: http.StatusBadRequest,
+		},
+		{
+			name:      "webhook not found",
+			projectID: "proj-1",
+			webhookID: "wh-unknown",
+			body: UpdateWebhookRequest{
+				URL: "https://example.com",
+			},
+			wantStatus: http.StatusNotFound,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			webhookRepo := &mockWebhookRepository{
+				webhooks: []*domain.Webhook{
+					{
+						ID:        "wh-123",
+						ProjectID: "proj-1",
+						URL:       "https://example.com/webhook",
+						Events:    []domain.WebhookEventType{domain.WebhookEventCommandStarted},
+						Enabled:   true,
+						CreatedAt: time.Now(),
+						UpdatedAt: time.Now(),
+					},
+				},
+			}
+			h := NewWebhookHandler(webhookRepo, projectRepo)
+
+			r := chi.NewRouter()
+			r.Put("/projects/{id}/webhooks/{webhookId}", h.Update)
+
+			body, _ := json.Marshal(tt.body)
+			req := httptest.NewRequest(http.MethodPut, "/projects/"+tt.projectID+"/webhooks/"+tt.webhookID, bytes.NewReader(body))
+			req.Header.Set("Content-Type", "application/json")
+			w := httptest.NewRecorder()
+
+			r.ServeHTTP(w, req)
+
+			if w.Code != tt.wantStatus {
+				t.Errorf("Update() status = %d, want %d, body: %s", w.Code, tt.wantStatus, w.Body.String())
+			}
+		})
+	}
+}
+
+func TestWebhookHandler_Delete(t *testing.T) {
+	projectRepo := newMockProjectRepo()
+	projectRepo.Register(context.Background(), &domain.Project{ID: "proj-1", Name: "Test Project"})
+
+	tests := []struct {
+		name       string
+		projectID  string
+		webhookID  string
+		wantStatus int
+	}{
+		{
+			name:       "delete existing webhook",
+			projectID:  "proj-1",
+			webhookID:  "wh-123",
+			wantStatus: http.StatusOK,
+		},
+		{
+			name:       "webhook not found",
+			projectID:  "proj-1",
+			webhookID:  "wh-unknown",
+			wantStatus: http.StatusNotFound,
+		},
+		{
+			name:       "project not found",
+			projectID:  "unknown",
+			webhookID:  "wh-123",
+			wantStatus: http.StatusNotFound,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			webhookRepo := &mockWebhookRepository{
+				webhooks: []*domain.Webhook{
+					{
+						ID:        "wh-123",
+						ProjectID: "proj-1",
+						URL:       "https://example.com/webhook",
+						Events:    []domain.WebhookEventType{domain.WebhookEventCommandStarted},
+						Enabled:   true,
+						CreatedAt: time.Now(),
+						UpdatedAt: time.Now(),
+					},
+				},
+			}
+			h := NewWebhookHandler(webhookRepo, projectRepo)
+
+			r := chi.NewRouter()
+			r.Delete("/projects/{id}/webhooks/{webhookId}", h.Delete)
+
+			req := httptest.NewRequest(http.MethodDelete, "/projects/"+tt.projectID+"/webhooks/"+tt.webhookID, nil)
+			w := httptest.NewRecorder()
+
+			r.ServeHTTP(w, req)
+
+			if w.Code != tt.wantStatus {
+				t.Errorf("Delete() status = %d, want %d", w.Code, tt.wantStatus)
+			}
+		})
+	}
+}
+
+func TestWebhookHandler_GetDeliveries(t *testing.T) {
+	projectRepo := newMockProjectRepo()
+	projectRepo.Register(context.Background(), &domain.Project{ID: "proj-1", Name: "Test Project"})
+
+	webhookRepo := &mockWebhookRepository{
+		webhooks: []*domain.Webhook{
+			{
+				ID:        "wh-123",
+				ProjectID: "proj-1",
+				URL:       "https://example.com/webhook",
+				Events:    []domain.WebhookEventType{domain.WebhookEventCommandStarted},
+				Enabled:   true,
+				CreatedAt: time.Now(),
+				UpdatedAt: time.Now(),
+			},
+		},
+		deliveries: []*domain.WebhookDelivery{
+			{
+				ID:             "del-1",
+				WebhookID:      "wh-123",
+				EventType:      domain.WebhookEventCommandStarted,
+				Payload:        `{"test": true}`,
+				ResponseStatus: 200,
+				Success:        true,
+				DeliveredAt:    time.Now(),
+			},
+		},
+	}
+
+	tests := []struct {
+		name       string
+		projectID  string
+		webhookID  string
+		query      string
+		wantStatus int
+	}{
+		{
+			name:       "get deliveries",
+			projectID:  "proj-1",
+			webhookID:  "wh-123",
+			query:      "",
+			wantStatus: http.StatusOK,
+		},
+		{
+			name:       "with filters",
+			projectID:  "proj-1",
+			webhookID:  "wh-123",
+			query:      "?success=true&limit=10",
+			wantStatus: http.StatusOK,
+		},
+		{
+			name:       "webhook not found",
+			projectID:  "proj-1",
+			webhookID:  "wh-unknown",
+			wantStatus: http.StatusNotFound,
+		},
+		{
+			name:       "project not found",
+			projectID:  "unknown",
+			webhookID:  "wh-123",
+			wantStatus: http.StatusNotFound,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			h := NewWebhookHandler(webhookRepo, projectRepo)
+
+			r := chi.NewRouter()
+			r.Get("/projects/{id}/webhooks/{webhookId}/deliveries", h.GetDeliveries)
+
+			req := httptest.NewRequest(http.MethodGet, "/projects/"+tt.projectID+"/webhooks/"+tt.webhookID+"/deliveries"+tt.query, nil)
+			w := httptest.NewRecorder()
+
+			r.ServeHTTP(w, req)
+
+			if w.Code != tt.wantStatus {
+				t.Errorf("GetDeliveries() status = %d, want %d", w.Code, tt.wantStatus)
+			}
+		})
+	}
+}
+
+func boolPtr(b bool) *bool {
+	return &b
+}
--- a/internal/metrics/metrics.go
+++ b/internal/metrics/metrics.go
@ -0,0 +1,142 @@
+// Package metrics provides Prometheus metrics for the rdev API.
+package metrics
+
+import (
+	"net/http"
+	"regexp"
+	"strconv"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
+	"github.com/prometheus/client_golang/prometheus/promhttp"
+)
+
+var (
+	// Commands
+	commandsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
+		Name: "rdev_commands_total",
+		Help: "Total number of commands executed",
+	}, []string{"project", "type", "status"})
+
+	commandDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
+		Name:    "rdev_command_duration_seconds",
+		Help:    "Duration of command execution in seconds",
+		Buckets: prometheus.ExponentialBuckets(0.1, 2, 15), // 0.1s to ~27min
+	}, []string{"project", "type"})
+
+	// Streams
+	activeStreams = promauto.NewGaugeVec(prometheus.GaugeOpts{
+		Name: "rdev_active_streams",
+		Help: "Number of active SSE streams",
+	}, []string{"project"})
+
+	streamReconnects = promauto.NewCounterVec(prometheus.CounterOpts{
+		Name: "rdev_stream_reconnects_total",
+		Help: "Total number of SSE stream reconnections",
+	}, []string{"project"})
+
+	// Authentication
+	authFailures = promauto.NewCounterVec(prometheus.CounterOpts{
+		Name: "rdev_auth_failures_total",
+		Help: "Total number of authentication failures",
+	}, []string{"reason"})
+
+	// API Requests
+	requestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
+		Name:    "rdev_api_request_duration_seconds",
+		Help:    "Duration of API requests in seconds",
+		Buckets: prometheus.DefBuckets,
+	}, []string{"method", "path", "status"})
+
+	requestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
+		Name: "rdev_api_requests_total",
+		Help: "Total number of API requests",
+	}, []string{"method", "path", "status"})
+)
+
+// RecordCommand records a command execution.
+func RecordCommand(project, cmdType, status string, durationMs int64) {
+	commandsTotal.WithLabelValues(project, cmdType, status).Inc()
+	commandDuration.WithLabelValues(project, cmdType).Observe(float64(durationMs) / 1000.0)
+}
+
+// IncActiveStreams increments the active stream count for a project.
+func IncActiveStreams(project string) {
+	activeStreams.WithLabelValues(project).Inc()
+}
+
+// DecActiveStreams decrements the active stream count for a project.
+func DecActiveStreams(project string) {
+	activeStreams.WithLabelValues(project).Dec()
+}
+
+// RecordStreamReconnect records a stream reconnection.
+func RecordStreamReconnect(project string) {
+	streamReconnects.WithLabelValues(project).Inc()
+}
+
+// RecordAuthFailure records an authentication failure.
+func RecordAuthFailure(reason string) {
+	authFailures.WithLabelValues(reason).Inc()
+}
+
+// Handler returns the Prometheus HTTP handler.
+func Handler() http.Handler {
+	return promhttp.Handler()
+}
+
+// Middleware returns an HTTP middleware that records request metrics.
+func Middleware(next http.Handler) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		start := time.Now()
+
+		// Wrap the response writer to capture status code
+		rw := &responseWriter{ResponseWriter: w, statusCode: http.StatusOK}
+
+		next.ServeHTTP(rw, r)
+
+		duration := time.Since(start).Seconds()
+		status := strconv.Itoa(rw.statusCode)
+		path := normalizePath(r.URL.Path)
+
+		requestDuration.WithLabelValues(r.Method, path, status).Observe(duration)
+		requestsTotal.WithLabelValues(r.Method, path, status).Inc()
+	})
+}
+
+// responseWriter wraps http.ResponseWriter to capture status code.
+type responseWriter struct {
+	http.ResponseWriter
+	statusCode int
+}
+
+func (rw *responseWriter) WriteHeader(code int) {
+	rw.statusCode = code
+	rw.ResponseWriter.WriteHeader(code)
+}
+
+// pathNormalizers contains patterns to normalize variable path segments.
+// Order matters - more specific patterns first.
+var pathNormalizers = []struct {
+	pattern *regexp.Regexp
+	replace string
+}{
+	// /keys/uuid -> /keys/{id}
+	{regexp.MustCompile(`^/keys/[^/]+$`), "/keys/{id}"},
+	// /projects/{id}/claude-config/{type}/{name} -> /projects/{id}/claude-config/{type}/{name}
+	{regexp.MustCompile(`^/projects/[^/]+/claude-config/(commands|skills|agents)/[^/]+$`), "/projects/{id}/claude-config/$1/{name}"},
+	// /projects/{id}/... (any sub-path) - must be last as it's most general
+	{regexp.MustCompile(`^/projects/[^/]+(/.*)?$`), "/projects/{id}$1"},
+}
+
+// normalizePath normalizes the URL path for consistent metric labels.
+// Replaces variable path segments with placeholders to prevent cardinality explosion.
+func normalizePath(path string) string {
+	for _, n := range pathNormalizers {
+		if n.pattern.MatchString(path) {
+			return n.pattern.ReplaceAllString(path, n.replace)
+		}
+	}
+	return path
+}
--- a/internal/metrics/metrics_test.go
+++ b/internal/metrics/metrics_test.go
@ -0,0 +1,42 @@
+package metrics
+
+import "testing"
+
+func TestNormalizePath(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected string
+	}{
+		// Keys
+		{"/keys/550e8400-e29b-41d4-a716-446655440000", "/keys/{id}"},
+		{"/keys", "/keys"},
+
+		// Projects
+		{"/projects/pantheon", "/projects/{id}"},
+		{"/projects/pantheon/claude", "/projects/{id}/claude"},
+		{"/projects/aeries/shell", "/projects/{id}/shell"},
+		{"/projects/test-123/events", "/projects/{id}/events"},
+
+		// Claude config
+		{"/projects/pantheon/claude-config/commands/deploy", "/projects/{id}/claude-config/commands/{name}"},
+		{"/projects/pantheon/claude-config/skills/go-testing", "/projects/{id}/claude-config/skills/{name}"},
+		{"/projects/pantheon/claude-config/agents/reviewer", "/projects/{id}/claude-config/agents/{name}"},
+		{"/projects/pantheon/claude-config/commands", "/projects/{id}/claude-config/commands"},
+		{"/projects/pantheon/claude-config", "/projects/{id}/claude-config"},
+
+		// Unchanged
+		{"/health", "/health"},
+		{"/ready", "/ready"},
+		{"/metrics", "/metrics"},
+		{"/docs", "/docs"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.input, func(t *testing.T) {
+			result := normalizePath(tt.input)
+			if result != tt.expected {
+				t.Errorf("normalizePath(%q) = %q, want %q", tt.input, result, tt.expected)
+			}
+		})
+	}
+}
--- a/internal/middleware/rate_limit.go
+++ b/internal/middleware/rate_limit.go
@ -0,0 +1,121 @@
+// Package middleware provides HTTP middleware components for the rdev API.
+package middleware
+
+import (
+	"log/slog"
+	"net/http"
+	"strconv"
+
+	"github.com/orchard9/rdev/internal/auth"
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/port"
+	"github.com/orchard9/rdev/pkg/api"
+)
+
+// RateLimitConfig holds configuration for the rate limit middleware.
+type RateLimitConfig struct {
+	// SkipPaths are paths that should not be rate limited.
+	SkipPaths map[string]bool
+
+	// Limiter is the rate limiter implementation to use.
+	Limiter port.RateLimiter
+
+	// Logger for rate limit events (optional).
+	Logger *slog.Logger
+}
+
+// DefaultRateLimitConfig returns a sensible default configuration.
+func DefaultRateLimitConfig() RateLimitConfig {
+	return RateLimitConfig{
+		SkipPaths: map[string]bool{
+			"/health":       true,
+			"/ready":        true,
+			"/docs":         true,
+			"/openapi.json": true,
+			"/metrics":      true,
+		},
+	}
+}
+
+// RateLimitMiddleware returns an HTTP middleware that enforces rate limits.
+// It requires the auth middleware to run first to set the API key context.
+func RateLimitMiddleware(cfg RateLimitConfig) func(http.Handler) http.Handler {
+	logger := cfg.Logger
+	if logger == nil {
+		logger = slog.Default()
+	}
+
+	return func(next http.Handler) http.Handler {
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			// Skip rate limiting for configured paths
+			if cfg.SkipPaths[r.URL.Path] {
+				next.ServeHTTP(w, r)
+				return
+			}
+
+			// Get API key from context (set by auth middleware)
+			apiKey := auth.GetAPIKey(r.Context())
+			if apiKey == nil {
+				// No API key means auth middleware hasn't run or request is unauthenticated
+				// Let the auth middleware handle this
+				next.ServeHTTP(w, r)
+				return
+			}
+
+			// Skip rate limiting for admin keys
+			if apiKey.ID == "admin" {
+				next.ServeHTTP(w, r)
+				return
+			}
+
+			// Check rate limit and record atomically to prevent race conditions
+			// RecordRequest is called first to ensure the count is incremented before
+			// we check, preventing burst bypass under high concurrency
+			if err := cfg.Limiter.RecordRequest(r.Context(), apiKey.ID); err != nil {
+				logger.Error("failed to record rate limit request", "error", err, "key_id", apiKey.ID)
+				// On error, allow the request (fail open)
+				next.ServeHTTP(w, r)
+				return
+			}
+
+			// Now check the limit (which includes the just-recorded request)
+			result, err := cfg.Limiter.CheckLimit(r.Context(), apiKey.ID)
+			if err != nil {
+				logger.Error("failed to check rate limit", "error", err, "key_id", apiKey.ID)
+				// On error, allow the request (fail open)
+				next.ServeHTTP(w, r)
+				return
+			}
+
+			// Set rate limit headers on all responses
+			setRateLimitHeaders(w, result)
+
+			if !result.Allowed {
+				// Rate limit exceeded
+				retryAfterSeconds := int(result.RetryAfter.Seconds())
+				if retryAfterSeconds < 1 {
+					retryAfterSeconds = 1
+				}
+				w.Header().Set("Retry-After", strconv.Itoa(retryAfterSeconds))
+				api.WriteError(w, r, http.StatusTooManyRequests, "RATE_LIMITED",
+					"Rate limit exceeded. Please retry after "+strconv.Itoa(retryAfterSeconds)+" seconds.")
+				return
+			}
+
+			next.ServeHTTP(w, r)
+		})
+	}
+}
+
+// setRateLimitHeaders sets the standard rate limit headers on the response.
+func setRateLimitHeaders(w http.ResponseWriter, result *domain.RateLimitResult) {
+	// Use the minute limit as the primary limit in headers (more commonly hit)
+	w.Header().Set("X-RateLimit-Limit", strconv.Itoa(result.LimitMinute))
+	w.Header().Set("X-RateLimit-Remaining", strconv.Itoa(result.RemainingMinute))
+	w.Header().Set("X-RateLimit-Reset", strconv.FormatInt(result.ResetMinute.Unix(), 10))
+
+	// Also include hourly limits in extended headers
+	w.Header().Set("X-RateLimit-Limit-Hour", strconv.Itoa(result.LimitHour))
+	w.Header().Set("X-RateLimit-Remaining-Hour", strconv.Itoa(result.RemainingHour))
+	w.Header().Set("X-RateLimit-Reset-Hour", strconv.FormatInt(result.ResetHour.Unix(), 10))
+}
--- a/internal/middleware/rate_limit_test.go
+++ b/internal/middleware/rate_limit_test.go
@ -0,0 +1,319 @@
+package middleware
+
+import (
+	"context"
+	"errors"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/orchard9/rdev/internal/auth"
+	"github.com/orchard9/rdev/internal/domain"
+)
+
+// mockRateLimiter implements port.RateLimiter for testing.
+type mockRateLimiter struct {
+	result      *domain.RateLimitResult
+	checkErr    error
+	recordErr   error
+	recordCalls int
+	checkCalls  int
+}
+
+func (m *mockRateLimiter) CheckLimit(ctx context.Context, apiKeyID string) (*domain.RateLimitResult, error) {
+	m.checkCalls++
+	if m.checkErr != nil {
+		return nil, m.checkErr
+	}
+	if m.result != nil {
+		return m.result, nil
+	}
+	// Default: allowed
+	return &domain.RateLimitResult{
+		Allowed:         true,
+		RemainingMinute: 50,
+		RemainingHour:   900,
+		LimitMinute:     60,
+		LimitHour:       1000,
+		ResetMinute:     time.Now().Add(time.Minute),
+		ResetHour:       time.Now().Add(time.Hour),
+	}, nil
+}
+
+func (m *mockRateLimiter) RecordRequest(ctx context.Context, apiKeyID string) error {
+	m.recordCalls++
+	return m.recordErr
+}
+
+func (m *mockRateLimiter) GetLimits(ctx context.Context, apiKeyID string) (*domain.RateLimitConfig, error) {
+	return &domain.RateLimitConfig{
+		PerMinute: 60,
+		PerHour:   1000,
+	}, nil
+}
+
+func (m *mockRateLimiter) Cleanup(ctx context.Context) error {
+	return nil
+}
+
+func TestRateLimitMiddleware_AllowedRequest(t *testing.T) {
+	limiter := &mockRateLimiter{}
+	cfg := RateLimitConfig{
+		Limiter:   limiter,
+		SkipPaths: make(map[string]bool),
+	}
+
+	handler := RateLimitMiddleware(cfg)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+
+	// Create request with API key context
+	req := httptest.NewRequest(http.MethodGet, "/api/test", nil)
+	ctx := auth.WithAPIKey(req.Context(), &auth.APIKey{ID: "test-key"})
+	req = req.WithContext(ctx)
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected status %d, got %d", http.StatusOK, w.Code)
+	}
+
+	// Verify rate limit headers are set
+	if w.Header().Get("X-RateLimit-Limit") == "" {
+		t.Error("expected X-RateLimit-Limit header to be set")
+	}
+	if w.Header().Get("X-RateLimit-Remaining") == "" {
+		t.Error("expected X-RateLimit-Remaining header to be set")
+	}
+	if w.Header().Get("X-RateLimit-Reset") == "" {
+		t.Error("expected X-RateLimit-Reset header to be set")
+	}
+
+	// Verify RecordRequest was called before CheckLimit
+	if limiter.recordCalls != 1 {
+		t.Errorf("expected RecordRequest to be called 1 time, got %d", limiter.recordCalls)
+	}
+	if limiter.checkCalls != 1 {
+		t.Errorf("expected CheckLimit to be called 1 time, got %d", limiter.checkCalls)
+	}
+}
+
+func TestRateLimitMiddleware_RateLimitExceeded(t *testing.T) {
+	limiter := &mockRateLimiter{
+		result: &domain.RateLimitResult{
+			Allowed:         false,
+			RetryAfter:      5 * time.Second,
+			RemainingMinute: 0,
+			RemainingHour:   0,
+			LimitMinute:     60,
+			LimitHour:       1000,
+			ResetMinute:     time.Now().Add(time.Minute),
+			ResetHour:       time.Now().Add(time.Hour),
+		},
+	}
+	cfg := RateLimitConfig{
+		Limiter:   limiter,
+		SkipPaths: make(map[string]bool),
+	}
+
+	handler := RateLimitMiddleware(cfg)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		t.Error("handler should not be called when rate limit exceeded")
+	}))
+
+	req := httptest.NewRequest(http.MethodGet, "/api/test", nil)
+	ctx := auth.WithAPIKey(req.Context(), &auth.APIKey{ID: "test-key"})
+	req = req.WithContext(ctx)
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	if w.Code != http.StatusTooManyRequests {
+		t.Errorf("expected status %d, got %d", http.StatusTooManyRequests, w.Code)
+	}
+
+	if w.Header().Get("Retry-After") == "" {
+		t.Error("expected Retry-After header to be set")
+	}
+}
+
+func TestRateLimitMiddleware_SkipPaths(t *testing.T) {
+	limiter := &mockRateLimiter{}
+	cfg := RateLimitConfig{
+		Limiter: limiter,
+		SkipPaths: map[string]bool{
+			"/health": true,
+		},
+	}
+
+	handler := RateLimitMiddleware(cfg)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+
+	req := httptest.NewRequest(http.MethodGet, "/health", nil)
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected status %d, got %d", http.StatusOK, w.Code)
+	}
+
+	// Rate limiter should not be called for skipped paths
+	if limiter.recordCalls != 0 {
+		t.Errorf("expected RecordRequest to not be called for skipped path, got %d calls", limiter.recordCalls)
+	}
+}
+
+func TestRateLimitMiddleware_NoAPIKey(t *testing.T) {
+	limiter := &mockRateLimiter{}
+	cfg := RateLimitConfig{
+		Limiter:   limiter,
+		SkipPaths: make(map[string]bool),
+	}
+
+	handler := RateLimitMiddleware(cfg)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+
+	// Request without API key context
+	req := httptest.NewRequest(http.MethodGet, "/api/test", nil)
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	// Should pass through without rate limiting
+	if w.Code != http.StatusOK {
+		t.Errorf("expected status %d, got %d", http.StatusOK, w.Code)
+	}
+
+	// Rate limiter should not be called
+	if limiter.recordCalls != 0 {
+		t.Errorf("expected RecordRequest to not be called without API key, got %d calls", limiter.recordCalls)
+	}
+}
+
+func TestRateLimitMiddleware_AdminKeyBypass(t *testing.T) {
+	limiter := &mockRateLimiter{}
+	cfg := RateLimitConfig{
+		Limiter:   limiter,
+		SkipPaths: make(map[string]bool),
+	}
+
+	handler := RateLimitMiddleware(cfg)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+
+	req := httptest.NewRequest(http.MethodGet, "/api/test", nil)
+	ctx := auth.WithAPIKey(req.Context(), &auth.APIKey{ID: "admin"})
+	req = req.WithContext(ctx)
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected status %d, got %d", http.StatusOK, w.Code)
+	}
+
+	// Rate limiter should not be called for admin
+	if limiter.recordCalls != 0 {
+		t.Errorf("expected RecordRequest to not be called for admin, got %d calls", limiter.recordCalls)
+	}
+}
+
+func TestRateLimitMiddleware_RecordError(t *testing.T) {
+	limiter := &mockRateLimiter{
+		recordErr: errors.New("record error"),
+	}
+	cfg := RateLimitConfig{
+		Limiter:   limiter,
+		SkipPaths: make(map[string]bool),
+	}
+
+	handler := RateLimitMiddleware(cfg)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+
+	req := httptest.NewRequest(http.MethodGet, "/api/test", nil)
+	ctx := auth.WithAPIKey(req.Context(), &auth.APIKey{ID: "test-key"})
+	req = req.WithContext(ctx)
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	// Should fail open on error
+	if w.Code != http.StatusOK {
+		t.Errorf("expected fail-open with status %d, got %d", http.StatusOK, w.Code)
+	}
+}
+
+func TestRateLimitMiddleware_CheckError(t *testing.T) {
+	limiter := &mockRateLimiter{
+		checkErr: errors.New("check error"),
+	}
+	cfg := RateLimitConfig{
+		Limiter:   limiter,
+		SkipPaths: make(map[string]bool),
+	}
+
+	handler := RateLimitMiddleware(cfg)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+
+	req := httptest.NewRequest(http.MethodGet, "/api/test", nil)
+	ctx := auth.WithAPIKey(req.Context(), &auth.APIKey{ID: "test-key"})
+	req = req.WithContext(ctx)
+	w := httptest.NewRecorder()
+
+	handler.ServeHTTP(w, req)
+
+	// Should fail open on error
+	if w.Code != http.StatusOK {
+		t.Errorf("expected fail-open with status %d, got %d", http.StatusOK, w.Code)
+	}
+}
+
+func TestDefaultRateLimitConfig(t *testing.T) {
+	cfg := DefaultRateLimitConfig()
+
+	expectedPaths := []string{"/health", "/ready", "/docs", "/openapi.json", "/metrics"}
+	for _, path := range expectedPaths {
+		if !cfg.SkipPaths[path] {
+			t.Errorf("expected %s to be in SkipPaths", path)
+		}
+	}
+}
+
+func TestSetRateLimitHeaders(t *testing.T) {
+	w := httptest.NewRecorder()
+	result := &domain.RateLimitResult{
+		Allowed:         true,
+		RemainingMinute: 50,
+		RemainingHour:   900,
+		LimitMinute:     60,
+		LimitHour:       1000,
+		ResetMinute:     time.Now().Add(time.Minute),
+		ResetHour:       time.Now().Add(time.Hour),
+	}
+
+	setRateLimitHeaders(w, result)
+
+	tests := []struct {
+		header string
+		want   bool
+	}{
+		{"X-RateLimit-Limit", true},
+		{"X-RateLimit-Remaining", true},
+		{"X-RateLimit-Reset", true},
+		{"X-RateLimit-Limit-Hour", true},
+		{"X-RateLimit-Remaining-Hour", true},
+		{"X-RateLimit-Reset-Hour", true},
+	}
+
+	for _, tt := range tests {
+		if (w.Header().Get(tt.header) != "") != tt.want {
+			t.Errorf("header %s: got %q, want present=%v", tt.header, w.Header().Get(tt.header), tt.want)
+		}
+	}
+}
--- a/internal/port/audit_logger.go
+++ b/internal/port/audit_logger.go
@ -0,0 +1,22 @@
+package port
+
+import (
+	"context"
+
+	"github.com/orchard9/rdev/internal/domain"
+)
+
+// AuditLogger defines operations for audit logging.
+type AuditLogger interface {
+	// LogCommandStart records the start of a command execution.
+	LogCommandStart(ctx context.Context, entry *domain.AuditLogEntry) error
+
+	// LogCommandEnd records the completion of a command execution.
+	LogCommandEnd(ctx context.Context, commandID string, result *domain.AuditResult) error
+
+	// List returns audit log entries matching the given filters.
+	List(ctx context.Context, filters domain.AuditFilters) ([]domain.AuditLogEntry, error)
+
+	// Get returns a single audit log entry by command ID.
+	Get(ctx context.Context, commandID string) (*domain.AuditLogEntry, error)
+}
--- a/internal/port/command_queue.go
+++ b/internal/port/command_queue.go
@ -0,0 +1,38 @@
+package port
+
+import (
+	"context"
+
+	"github.com/orchard9/rdev/internal/domain"
+)
+
+// CommandQueue defines operations for the command queue repository.
+type CommandQueue interface {
+	// Enqueue adds a command to the queue.
+	Enqueue(ctx context.Context, cmd *domain.QueuedCommand) error
+
+	// Dequeue retrieves and locks the next pending command for a project.
+	// Returns nil if no commands are pending.
+	// The command status is atomically updated to 'running'.
+	Dequeue(ctx context.Context, projectID string) (*domain.QueuedCommand, error)
+
+	// UpdateStatus updates the status of a queued command.
+	// If result is provided, it also updates the result fields.
+	UpdateStatus(ctx context.Context, cmdID domain.QueuedCommandID, status domain.QueueStatus, result *domain.QueuedCommandResult) error
+
+	// GetByID retrieves a specific queued command by ID.
+	GetByID(ctx context.Context, cmdID domain.QueuedCommandID) (*domain.QueuedCommand, error)
+
+	// List returns queued commands for a project with optional filters.
+	List(ctx context.Context, projectID string, filters *domain.QueueFilters) ([]*domain.QueuedCommand, error)
+
+	// Cancel marks a pending command as cancelled.
+	// Returns an error if the command is not in pending status.
+	Cancel(ctx context.Context, cmdID domain.QueuedCommandID) error
+
+	// GetStats returns queue statistics for a project (or all projects if empty).
+	GetStats(ctx context.Context, projectID string) (*domain.QueueStats, error)
+
+	// CleanupOld removes completed/failed/cancelled commands older than the specified duration.
+	CleanupOld(ctx context.Context, olderThanDays int) (int64, error)
+}
--- a/internal/port/port_test.go
+++ b/internal/port/port_test.go
@ -0,0 +1,380 @@
+package port_test
+
+import (
+	"context"
+	"testing"
+
+	"github.com/orchard9/rdev/internal/domain"
+	"github.com/orchard9/rdev/internal/port"
+)
+
+// =============================================================================
+// Interface Compliance Tests
+//
+// These tests verify that mock implementations can satisfy the port interfaces.
+// They serve as compile-time verification that interfaces are correctly defined
+// and provide example implementations for testing.
+// =============================================================================
+
+// Compile-time interface compliance checks
+var (
+	_ port.ProjectRepository = (*mockProjectRepository)(nil)
+	_ port.CommandExecutor   = (*mockCommandExecutor)(nil)
+	_ port.APIKeyRepository  = (*mockAPIKeyRepository)(nil)
+	_ port.StreamPublisher   = (*mockStreamPublisher)(nil)
+)
+
+// =============================================================================
+// Mock Implementations
+// =============================================================================
+
+type mockProjectRepository struct {
+	projects map[domain.ProjectID]*domain.Project
+}
+
+func newMockProjectRepository() *mockProjectRepository {
+	return &mockProjectRepository{
+		projects: make(map[domain.ProjectID]*domain.Project),
+	}
+}
+
+func (m *mockProjectRepository) List(ctx context.Context) ([]domain.Project, error) {
+	result := make([]domain.Project, 0, len(m.projects))
+	for _, p := range m.projects {
+		result = append(result, *p)
+	}
+	return result, nil
+}
+
+func (m *mockProjectRepository) Get(ctx context.Context, id domain.ProjectID) (*domain.Project, error) {
+	if p, ok := m.projects[id]; ok {
+		return p, nil
+	}
+	return nil, domain.ErrProjectNotFound
+}
+
+func (m *mockProjectRepository) Exists(ctx context.Context, id domain.ProjectID) (bool, error) {
+	_, ok := m.projects[id]
+	return ok, nil
+}
+
+func (m *mockProjectRepository) Register(ctx context.Context, project *domain.Project) error {
+	m.projects[project.ID] = project
+	return nil
+}
+
+func (m *mockProjectRepository) Unregister(ctx context.Context, id domain.ProjectID) error {
+	delete(m.projects, id)
+	return nil
+}
+
+func (m *mockProjectRepository) RefreshStatus(ctx context.Context) error {
+	return nil
+}
+
+type mockCommandExecutor struct {
+	executeFunc func(ctx context.Context, cmd *domain.Command, podName string, handler domain.OutputHandler) (*domain.CommandResult, error)
+}
+
+func (m *mockCommandExecutor) Execute(ctx context.Context, cmd *domain.Command, podName string, handler domain.OutputHandler) (*domain.CommandResult, error) {
+	if m.executeFunc != nil {
+		return m.executeFunc(ctx, cmd, podName, handler)
+	}
+	return &domain.CommandResult{
+		CommandID:  cmd.ID,
+		ExitCode:   0,
+		DurationMs: 100,
+	}, nil
+}
+
+func (m *mockCommandExecutor) Cancel(ctx context.Context, cmdID domain.CommandID) error {
+	return nil
+}
+
+func (m *mockCommandExecutor) PodExists(ctx context.Context, podName string) (bool, error) {
+	return true, nil
+}
+
+func (m *mockCommandExecutor) CheckConnection(ctx context.Context) error {
+	return nil
+}
+
+type mockAPIKeyRepository struct {
+	keys map[domain.APIKeyID]*domain.APIKey
+}
+
+func newMockAPIKeyRepository() *mockAPIKeyRepository {
+	return &mockAPIKeyRepository{
+		keys: make(map[domain.APIKeyID]*domain.APIKey),
+	}
+}
+
+func (m *mockAPIKeyRepository) Create(ctx context.Context, key *domain.APIKey, keyHash string) error {
+	m.keys[key.ID] = key
+	return nil
+}
+
+func (m *mockAPIKeyRepository) GetByHash(ctx context.Context, keyHash string) (*domain.APIKey, error) {
+	// In a real implementation, this would look up by hash
+	return nil, domain.ErrKeyNotFound
+}
+
+func (m *mockAPIKeyRepository) Get(ctx context.Context, id domain.APIKeyID) (*domain.APIKey, error) {
+	if k, ok := m.keys[id]; ok {
+		return k, nil
+	}
+	return nil, domain.ErrKeyNotFound
+}
+
+func (m *mockAPIKeyRepository) List(ctx context.Context) ([]*domain.APIKey, error) {
+	result := make([]*domain.APIKey, 0, len(m.keys))
+	for _, k := range m.keys {
+		result = append(result, k)
+	}
+	return result, nil
+}
+
+func (m *mockAPIKeyRepository) Revoke(ctx context.Context, id domain.APIKeyID) error {
+	if _, ok := m.keys[id]; !ok {
+		return domain.ErrKeyNotFound
+	}
+	return nil
+}
+
+func (m *mockAPIKeyRepository) UpdateLastUsed(ctx context.Context, id domain.APIKeyID) error {
+	if _, ok := m.keys[id]; !ok {
+		return domain.ErrKeyNotFound
+	}
+	return nil
+}
+
+type mockStreamPublisher struct {
+	subscribers map[string][]chan port.StreamEvent
+}
+
+func newMockStreamPublisher() *mockStreamPublisher {
+	return &mockStreamPublisher{
+		subscribers: make(map[string][]chan port.StreamEvent),
+	}
+}
+
+func (m *mockStreamPublisher) Subscribe(streamID string) (<-chan port.StreamEvent, func()) {
+	ch := make(chan port.StreamEvent, 10)
+	m.subscribers[streamID] = append(m.subscribers[streamID], ch)
+	cleanup := func() {
+		close(ch)
+	}
+	return ch, cleanup
+}
+
+func (m *mockStreamPublisher) SubscribeFromID(streamID string, lastEventID string) (<-chan port.StreamEvent, func()) {
+	// Simplified: just subscribe without replay
+	return m.Subscribe(streamID)
+}
+
+func (m *mockStreamPublisher) Publish(streamID string, event port.StreamEvent) string {
+	for _, ch := range m.subscribers[streamID] {
+		select {
+		case ch <- event:
+		default:
+			// Channel full, skip
+		}
+	}
+	return event.ID
+}
+
+func (m *mockStreamPublisher) Close(streamID string) {
+	for _, ch := range m.subscribers[streamID] {
+		close(ch)
+	}
+	delete(m.subscribers, streamID)
+}
+
+// =============================================================================
+// Mock Usage Tests
+// =============================================================================
+
+func TestMockProjectRepository_BasicOperations(t *testing.T) {
+	repo := newMockProjectRepository()
+	ctx := context.Background()
+
+	// Register a project
+	project := &domain.Project{
+		ID:     "test-proj",
+		Name:   "Test Project",
+		Status: domain.ProjectStatusRunning,
+	}
+
+	if err := repo.Register(ctx, project); err != nil {
+		t.Fatalf("Register failed: %v", err)
+	}
+
+	// Verify it exists
+	exists, err := repo.Exists(ctx, "test-proj")
+	if err != nil {
+		t.Fatalf("Exists failed: %v", err)
+	}
+	if !exists {
+		t.Error("project should exist after registration")
+	}
+
+	// Get the project
+	got, err := repo.Get(ctx, "test-proj")
+	if err != nil {
+		t.Fatalf("Get failed: %v", err)
+	}
+	if got.Name != "Test Project" {
+		t.Errorf("Got name %q, want %q", got.Name, "Test Project")
+	}
+
+	// List projects
+	list, err := repo.List(ctx)
+	if err != nil {
+		t.Fatalf("List failed: %v", err)
+	}
+	if len(list) != 1 {
+		t.Errorf("List returned %d projects, want 1", len(list))
+	}
+
+	// Unregister
+	if err := repo.Unregister(ctx, "test-proj"); err != nil {
+		t.Fatalf("Unregister failed: %v", err)
+	}
+
+	// Verify not found
+	_, err = repo.Get(ctx, "test-proj")
+	if err != domain.ErrProjectNotFound {
+		t.Errorf("Get after unregister: got error %v, want %v", err, domain.ErrProjectNotFound)
+	}
+}
+
+func TestMockCommandExecutor_Execute(t *testing.T) {
+	executor := &mockCommandExecutor{}
+	ctx := context.Background()
+
+	cmd := &domain.Command{
+		ID:        "cmd-1",
+		ProjectID: "proj-1",
+		Type:      domain.CommandTypeShell,
+		Args:      []string{"echo", "hello"},
+	}
+
+	var outputLines []domain.OutputLine
+	handler := func(line domain.OutputLine) {
+		outputLines = append(outputLines, line)
+	}
+
+	result, err := executor.Execute(ctx, cmd, "test-pod", handler)
+	if err != nil {
+		t.Fatalf("Execute failed: %v", err)
+	}
+
+	if result.CommandID != "cmd-1" {
+		t.Errorf("CommandID = %q, want %q", result.CommandID, "cmd-1")
+	}
+	if result.ExitCode != 0 {
+		t.Errorf("ExitCode = %d, want 0", result.ExitCode)
+	}
+}
+
+func TestMockAPIKeyRepository_CRUD(t *testing.T) {
+	repo := newMockAPIKeyRepository()
+	ctx := context.Background()
+
+	key := &domain.APIKey{
+		ID:     "key-1",
+		Name:   "Test Key",
+		Scopes: []domain.Scope{domain.ScopeProjectsRead},
+	}
+
+	// Create
+	if err := repo.Create(ctx, key, "hash123"); err != nil {
+		t.Fatalf("Create failed: %v", err)
+	}
+
+	// Get
+	got, err := repo.Get(ctx, "key-1")
+	if err != nil {
+		t.Fatalf("Get failed: %v", err)
+	}
+	if got.Name != "Test Key" {
+		t.Errorf("Name = %q, want %q", got.Name, "Test Key")
+	}
+
+	// List
+	list, err := repo.List(ctx)
+	if err != nil {
+		t.Fatalf("List failed: %v", err)
+	}
+	if len(list) != 1 {
+		t.Errorf("List returned %d keys, want 1", len(list))
+	}
+
+	// Revoke
+	if err := repo.Revoke(ctx, "key-1"); err != nil {
+		t.Fatalf("Revoke failed: %v", err)
+	}
+
+	// UpdateLastUsed
+	if err := repo.UpdateLastUsed(ctx, "key-1"); err != nil {
+		t.Fatalf("UpdateLastUsed failed: %v", err)
+	}
+}
+
+func TestMockStreamPublisher_PubSub(t *testing.T) {
+	pub := newMockStreamPublisher()
+
+	// Subscribe
+	ch, cleanup := pub.Subscribe("stream-1")
+	defer cleanup()
+
+	// Publish
+	event := port.StreamEvent{
+		ID:   "evt-1",
+		Type: "output",
+		Data: map[string]any{"line": "hello"},
+	}
+
+	eventID := pub.Publish("stream-1", event)
+	if eventID != "evt-1" {
+		t.Errorf("Publish returned ID %q, want %q", eventID, "evt-1")
+	}
+
+	// Receive
+	select {
+	case received := <-ch:
+		if received.ID != "evt-1" {
+			t.Errorf("Received event ID %q, want %q", received.ID, "evt-1")
+		}
+		if received.Data["line"] != "hello" {
+			t.Errorf("Received data = %v, want line=hello", received.Data)
+		}
+	default:
+		t.Error("expected to receive event")
+	}
+}
+
+// =============================================================================
+// StreamEvent Tests
+// =============================================================================
+
+func TestStreamEvent_CanBeInstantiated(t *testing.T) {
+	event := port.StreamEvent{
+		ID:   "event-123",
+		Type: "command_output",
+		Data: map[string]any{
+			"stream": "stdout",
+			"line":   "test output",
+		},
+	}
+
+	if event.ID != "event-123" {
+		t.Errorf("StreamEvent.ID = %q, want %q", event.ID, "event-123")
+	}
+	if event.Type != "command_output" {
+		t.Errorf("StreamEvent.Type = %q, want %q", event.Type, "command_output")
+	}
+	if event.Data["stream"] != "stdout" {
+		t.Errorf("StreamEvent.Data[stream] = %v, want stdout", event.Data["stream"])
+	}
+}
--- a/internal/port/rate_limiter.go
+++ b/internal/port/rate_limiter.go
@ -0,0 +1,26 @@
+package port
+
+import (
+	"context"
+
+	"github.com/orchard9/rdev/internal/domain"
+)
+
+// RateLimiter defines operations for rate limiting API requests.
+type RateLimiter interface {
+	// CheckLimit checks if a request is allowed under the rate limit.
+	// Returns the result including whether allowed, remaining counts, and retry timing.
+	CheckLimit(ctx context.Context, keyID string) (*domain.RateLimitResult, error)
+
+	// RecordRequest records that a request was made for the given API key.
+	// This should be called after CheckLimit returns Allowed=true.
+	RecordRequest(ctx context.Context, keyID string) error
+
+	// GetLimits retrieves the rate limit configuration for an API key.
+	// Returns default limits if the key doesn't have custom limits set.
+	GetLimits(ctx context.Context, keyID string) (*domain.RateLimitConfig, error)
+
+	// Cleanup removes expired rate limit state entries.
+	// This should be called periodically to prevent table bloat.
+	Cleanup(ctx context.Context) error
+}
--- a/internal/port/stream_publisher.go
+++ b/internal/port/stream_publisher.go
@ -2,6 +2,7 @@ package port

 // StreamEvent represents an event to be published on a stream.
 type StreamEvent struct {
+	ID   string // Event ID for Last-Event-ID support
 	Type string
 	Data map[string]any
 }
@ -12,8 +13,14 @@ type StreamPublisher interface {
 	// Returns a channel that will receive events and a cleanup function.
 	Subscribe(streamID string) (<-chan StreamEvent, func())

+	// SubscribeFromID creates a subscription starting from a specific event ID.
+	// This is used for reconnection with Last-Event-ID support.
+	// Events since lastEventID will be replayed before new events are delivered.
+	SubscribeFromID(streamID string, lastEventID string) (<-chan StreamEvent, func())
+
 	// Publish sends an event to all subscribers of a stream.
-	Publish(streamID string, event StreamEvent)
+	// Returns the generated event ID.
+	Publish(streamID string, event StreamEvent) string

 	// Close closes a stream and all its subscriptions.
 	Close(streamID string)
--- a/internal/port/webhook.go
+++ b/internal/port/webhook.go
@ -0,0 +1,50 @@
+package port
+
+import (
+	"context"
+
+	"github.com/orchard9/rdev/internal/domain"
+)
+
+// WebhookRepository defines operations for webhook storage.
+type WebhookRepository interface {
+	// Create creates a new webhook subscription.
+	Create(ctx context.Context, webhook *domain.Webhook) error
+
+	// Update updates an existing webhook.
+	Update(ctx context.Context, webhook *domain.Webhook) error
+
+	// Delete deletes a webhook by ID.
+	Delete(ctx context.Context, id domain.WebhookID) error
+
+	// GetByID retrieves a webhook by ID.
+	GetByID(ctx context.Context, id domain.WebhookID) (*domain.Webhook, error)
+
+	// ListByProject returns all webhooks for a project.
+	ListByProject(ctx context.Context, projectID string) ([]*domain.Webhook, error)
+
+	// ListEnabledByProjectAndEvent returns enabled webhooks that subscribe to a specific event type.
+	ListEnabledByProjectAndEvent(ctx context.Context, projectID string, eventType domain.WebhookEventType) ([]*domain.Webhook, error)
+
+	// RecordDelivery records a webhook delivery attempt.
+	RecordDelivery(ctx context.Context, delivery *domain.WebhookDelivery) error
+
+	// GetDeliveries returns delivery history for a webhook.
+	GetDeliveries(ctx context.Context, webhookID domain.WebhookID, filters *domain.WebhookDeliveryFilters) ([]*domain.WebhookDelivery, error)
+
+	// CleanupOldDeliveries removes delivery records older than the specified number of days.
+	CleanupOldDeliveries(ctx context.Context, olderThanDays int) (int64, error)
+}
+
+// WebhookDispatcher defines operations for dispatching webhook events.
+type WebhookDispatcher interface {
+	// Dispatch sends an event to all subscribed webhooks for a project.
+	// This is a non-blocking operation - deliveries happen in the background.
+	Dispatch(ctx context.Context, projectID string, event *domain.WebhookEvent) error
+
+	// Start starts the background dispatcher workers.
+	Start() error
+
+	// Stop gracefully shuts down the dispatcher.
+	Stop()
+}
--- a/internal/projects/registry.go
+++ b/internal/projects/registry.go
@ -1,148 +0,0 @@
-// Package projects provides a registry of claudebox projects.
-package projects
-
-import (
-	"context"
-	"fmt"
-	"os/exec"
-	"strings"
-	"sync"
-)
-
-// Project represents a claudebox project.
-type Project struct {
-	ID          string `json:"id"`
-	Name        string `json:"name"`
-	Description string `json:"description"`
-	PodName     string `json:"pod"`
-	Status      string `json:"status"`
-	Workspace   string `json:"workspace,omitempty"`
-}
-
-// Registry manages the list of available projects.
-type Registry struct {
-	namespace string
-	projects  map[string]*Project
-	mu        sync.RWMutex
-}
-
-// NewRegistry creates a new project registry.
-func NewRegistry(namespace string) *Registry {
-	r := &Registry{
-		namespace: namespace,
-		projects:  make(map[string]*Project),
-	}
-
-	// Initialize with known projects
-	// In the future, this could discover projects from K8s labels
-	r.projects["pantheon"] = &Project{
-		ID:          "pantheon",
-		Name:        "Pantheon",
-		Description: "Go API backend",
-		PodName:     "claudebox-pantheon-0",
-		Status:      "unknown",
-		Workspace:   "/workspace",
-	}
-	r.projects["aeries"] = &Project{
-		ID:          "aeries",
-		Name:        "Aeries",
-		Description: "Note community platform",
-		PodName:     "claudebox-aeries-0",
-		Status:      "unknown",
-		Workspace:   "/workspace",
-	}
-
-	return r
-}
-
-// List returns all projects.
-func (r *Registry) List() []*Project {
-	r.mu.RLock()
-	defer r.mu.RUnlock()
-
-	projects := make([]*Project, 0, len(r.projects))
-	for _, p := range r.projects {
-		projects = append(projects, p)
-	}
-	return projects
-}
-
-// Get returns a project by ID.
-func (r *Registry) Get(id string) (*Project, bool) {
-	r.mu.RLock()
-	defer r.mu.RUnlock()
-
-	p, ok := r.projects[id]
-	return p, ok
-}
-
-// Exists checks if a project exists.
-func (r *Registry) Exists(id string) bool {
-	r.mu.RLock()
-	defer r.mu.RUnlock()
-
-	_, ok := r.projects[id]
-	return ok
-}
-
-// RefreshStatus updates the status of all projects from K8s.
-func (r *Registry) RefreshStatus(ctx context.Context) error {
-	r.mu.Lock()
-	defer r.mu.Unlock()
-
-	for _, p := range r.projects {
-		status, err := getPodStatus(ctx, r.namespace, p.PodName)
-		if err != nil {
-			p.Status = "error"
-			continue
-		}
-		p.Status = status
-	}
-	return nil
-}
-
-// getPodStatus queries the status of a pod.
-func getPodStatus(ctx context.Context, namespace, podName string) (string, error) {
-	cmd := exec.CommandContext(ctx, "kubectl",
-		"get", "pod", podName,
-		"-n", namespace,
-		"-o", "jsonpath={.status.phase}",
-	)
-
-	output, err := cmd.Output()
-	if err != nil {
-		// Check if pod doesn't exist
-		if strings.Contains(err.Error(), "not found") {
-			return "not_found", nil
-		}
-		return "unknown", fmt.Errorf("get pod status: %w", err)
-	}
-
-	phase := strings.ToLower(strings.TrimSpace(string(output)))
-	switch phase {
-	case "running":
-		return "running", nil
-	case "pending":
-		return "pending", nil
-	case "succeeded":
-		return "completed", nil
-	case "failed":
-		return "failed", nil
-	default:
-		return phase, nil
-	}
-}
-
-// Register adds a new project to the registry.
-func (r *Registry) Register(p *Project) {
-	r.mu.Lock()
-	defer r.mu.Unlock()
-	r.projects[p.ID] = p
-}
-
-// Unregister removes a project from the registry.
-func (r *Registry) Unregister(id string) {
-	r.mu.Lock()
-	defer r.mu.Unlock()
-	delete(r.projects, id)
-}
--- a/Show More
+++ b/Show More