rdev/cookbooks/trees/slackpath-4-microservice-constellation.yaml
jordan fa0d030def
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
feat: improve notify domain verification reliability and add status endpoints
- Add verifyWithRetry to provisioner: 60s initial DNS propagation delay,
  5 retries with 30s backoff before marking verification as failed
- Add GetNotifyDomainStatus: polls Resend API for domain verification status,
  returns "not_configured" when Resend not set up
- Add VerifyProjectNotify: synchronous re-verification for handler use
- Add getDomainStatus to resendAPI interface + resendClient implementation
- Add NotifyDomainStatus domain struct (host, resend_domain_id, status)
- Guard NOTIFY_RESEND_DOMAIN_ID storage against empty string writes
- New handler: GET /projects/{id}/notify/status (returns verification state)
- New handler: POST /projects/{id}/notify/verify (triggers re-verification)
- Add verify-notify-domain cookbook step to persona-community,
  slackpath-1, and slackpath-4 trees (polls status for up to 6 min)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-23 16:25:55 -07:00

154 lines
5.0 KiB
YAML

name: microservice-constellation
description: "Slack Path 4: Distributed System. Orchestrates communication between Auth, Chat, and Worker services."
version: 1
vars:
project_name: ""
feature_slug: "mesh-interop"
steps:
# --- Infrastructure ---
create-project:
action: api
method: POST
endpoint: /project
body:
name: "{{ .vars.project_name }}"
description: "Slack Path 4: Microservices"
outputs:
- project_id: .data.name
- domain: .data.domain
add-db:
description: Add CockroachDB for user/auth storage (may already exist from skeleton)
depends_on: [create-project]
on_error: continue
action: api
method: POST
endpoint: "/projects/{{ .outputs.create-project.project_id }}/components"
body:
type: postgres
name: "main-db"
add-redis:
description: Add Redis for job queue and pub/sub (may already exist from skeleton)
depends_on: [create-project]
on_error: continue
action: api
method: POST
endpoint: "/projects/{{ .outputs.create-project.project_id }}/components"
body:
type: redis
name: "job-queue"
add-components:
description: Add auth, chat, and worker atomically (single git commit)
depends_on: [add-db, add-redis]
action: api
method: POST
endpoint: "/projects/{{ .outputs.create-project.project_id }}/components/batch"
body:
components:
- type: service
name: "auth-svc"
- type: service
name: "chat-svc"
- type: worker
name: "worker-svc"
wait-infra:
depends_on: [add-components]
action: wait_pipeline
project_id: "{{ .outputs.create-project.project_id }}"
verify-notify-domain:
description: Wait for the project email domain to be verified by Resend
depends_on: [wait-infra]
on_error: continue
action: shell
command: |
PROJECT_ID="{{ .outputs.create-project.project_id }}"
API_URL="${RDEV_API_URL:-https://rdev.masq-ops.orchard9.ai}"
for i in $(seq 1 12); do
STATUS=$(curl -sf "$API_URL/projects/$PROJECT_ID/notify/status" \
-H "X-API-Key: $RDEV_API_KEY" | jq -r '.data.status // empty' 2>/dev/null)
echo "notify domain status (attempt $i/12): $STATUS"
if [ "$STATUS" = "verified" ]; then
echo "Email domain verified — OTP and auth emails will work"
exit 0
fi
if [ "$STATUS" = "not_configured" ]; then
echo "Notify not configured — skipping"
exit 0
fi
sleep 30
done
echo "Email domain not verified after 6 minutes — continuing, but OTP emails may fail"
exit 0
# --- Implementation ---
implement-mesh:
description: "Agent implements Service-to-Service calls (Chat calls Auth, Chat queues to Worker)"
depends_on: [verify-notify-domain]
action: api
method: POST
endpoint: "/projects/{{ .outputs.create-project.project_id }}/builds"
body:
prompt: "/implement-feature {{ .vars.feature_slug }} --requirements 'Chat Service must call http://auth-svc/validate to check tokens. Chat Service must push to Redis queue for Worker. Worker must process tasks.'"
auto_commit: true
auto_push: true
git_clone_url: "https://git.threesix.ai/jordan/{{ .outputs.create-project.project_id }}.git"
outputs:
- build_id: .data.task_id
wait-build:
description: Wait for agent code generation
depends_on: [implement-mesh]
action: wait_build
build_id: "{{ .outputs.implement-mesh.build_id }}"
max_attempts: 120
poll_interval: 5
wait-deploy:
depends_on: [wait-build]
action: wait_pipeline
project_id: "{{ .outputs.create-project.project_id }}"
# --- Verification ---
verify-services-running:
description: "Verify auth and chat services are healthy"
depends_on: [wait-deploy]
action: shell
command: |
DOMAIN="{{ .outputs.create-project.domain }}"
AUTH_HEALTH=$(curl -s "https://$DOMAIN/api/auth-svc/health" | jq -r '.data.status // empty')
CHAT_HEALTH=$(curl -s "https://$DOMAIN/api/chat-svc/health" | jq -r '.data.status // empty')
if [ "$AUTH_HEALTH" == "healthy" ] && [ "$CHAT_HEALTH" == "healthy" ]; then
echo "Both services healthy"
exit 0
else
echo "Auth: $AUTH_HEALTH, Chat: $CHAT_HEALTH"
exit 1
fi
verify-e2e:
description: "Call Chat Service (which calls Auth internally) - optional"
depends_on: [verify-services-running]
on_error: continue
action: shell
command: |
DOMAIN="{{ .outputs.create-project.domain }}"
# We mock a token (assuming auth service has a backdoor or we register first)
# This test verifies that the Chat service didn't crash trying to reach Auth
# and that it successfully handed off work.
RESP=$(curl -s "https://$DOMAIN/api/chat/status")
if echo "$RESP" | grep "Services Connected"; then exit 0; else exit 1; fi
teardown:
- action: api
method: DELETE
endpoint: "/project/{{ .outputs.create-project.project_id }}"