rdev/internal/adapter/notify/provisioner.go
jordan ddcfe52b5c
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
feat: implement shared notify host model for platform email delivery
Replace per-project notify host provisioning (7-9 API calls + DNS + async
Resend verification) with a shared platform host for all *.threesix.ai projects.

Under the new model:
- CreateProjectNotify: 3 calls only (account + send key + host grant)
- No per-project Resend domain, DNS records, or async verification
- All *.threesix.ai projects share `threesix.ai` as the platform host
- Custom domains still get a dedicated host via ReprovisionNotifyHost

Changes:
- domain/notify.go: slim NotifyCredentials (no Host/From/ResendDomainID);
  add NotifyHostCredentials for reprovision return path
- port/notify_provisioner.go: update interface signatures and docs
- adapter/notify/provisioner.go: rewrite CreateProjectNotify (3 steps);
  rewrite DeleteProjectNotify (account-only vs full cleanup)
- adapter/notify/provisioner_reprovision.go: return *NotifyHostCredentials
- adapter/notify/provisioner_test.go: update tests for new model
- service/project_infra_crud.go: store only NOTIFY_API_KEY on provision
- domain/credential.go: add CredKeyNotifySharedHost/CredKeyNotifySharedFrom
- cmd/rdev-api/config.go: add NotifySharedHost/NotifySharedFrom to InfraConfig
- service/component.go: add notifySharedHost/notifySharedFrom + WithNotifyDefaults
- service/component_deploy.go: inject shared host defaults when no custom host stored
- handlers/notify.go: handle shared-host projects in Reprovision guard;
  add WithSharedNotifyHost builder
- cmd/rdev-api/main.go: wire SharedHost to provisioner, component service,
  and notify handler

Bootstrap: NOTIFY_SHARED_HOST=threesix.ai and NOTIFY_SHARED_FROM=noreply@threesix.ai
stored in credential store (host id=1 already provisioned with Resend provider).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 17:04:11 -07:00

385 lines
12 KiB
Go

package notify
import (
"context"
"fmt"
"log/slog"
"time"
"github.com/orchard9/rdev/internal/domain"
"github.com/orchard9/rdev/internal/port"
)
// Provisioner implements port.NotifyProvisioner using the notify admin API.
// Under the shared-host model, default projects share a pre-provisioned platform
// sending host; only an account, send key, and host grant are created per-project.
// Custom domains still receive a dedicated host via ReprovisionNotifyHost.
type Provisioner struct {
client notifyAdminAPI
resend resendAPI // nil when ResendAPIKey not configured
resendAPIKey string // passed to createProvider; kept separate from resend for interface compatibility
dns port.DNSProvider // nil when Cloudflare not configured
baseDomain string // e.g., "threesix.ai"
sharedHost string // pre-provisioned platform sending host (e.g., "mail.threesix.ai")
logger *slog.Logger
}
// Config holds configuration for the notify provisioner.
type Config struct {
BaseURL string // Required: notify service URL (e.g., "https://notify.orchard9.ai")
AdminKey string // Required: admin API key (notify_admin_...)
ResendAPIKey string // Optional: Resend API key for per-project domain provisioning
BaseDomain string // Base domain for per-project hosts (default: "threesix.ai")
SharedHost string // Pre-provisioned platform sending host (e.g., "mail.threesix.ai"). Required.
}
// NewProvisioner creates a new notify provisioner.
func NewProvisioner(cfg Config, dns port.DNSProvider, logger *slog.Logger) *Provisioner {
baseDomain := cfg.BaseDomain
if baseDomain == "" {
baseDomain = "threesix.ai"
}
p := &Provisioner{
client: newAdminClient(cfg.BaseURL, cfg.AdminKey),
dns: dns,
baseDomain: baseDomain,
sharedHost: cfg.SharedHost,
logger: logger,
}
if cfg.ResendAPIKey != "" {
p.resend = newResendClient(cfg.ResendAPIKey)
p.resendAPIKey = cfg.ResendAPIKey
}
return p
}
// CreateProjectNotify provisions a notify account with send key and grants access
// to the shared platform sending host.
//
// Steps:
// 1. Create notify account "project-{projectID}"
// 2. Create send key for the account
// 3. Grant the account access to p.sharedHost (non-fatal)
func (p *Provisioner) CreateProjectNotify(ctx context.Context, projectID, slug string) (*domain.NotifyCredentials, error) {
if p.sharedHost == "" {
return nil, fmt.Errorf("notify: shared host not configured")
}
accountName := "project-" + projectID
// 1. Create account
acct, err := p.client.createAccount(ctx, accountName)
if err != nil {
return nil, fmt.Errorf("notify: create account for project %s: %w", projectID, err)
}
// 2. Create send key
key, err := p.client.createSendKey(ctx, acct.ID, accountName+"-send")
if err != nil {
p.bestEffortDeleteAccount(ctx, acct.ID, projectID)
return nil, fmt.Errorf("notify: create send key for project %s: %w", projectID, err)
}
// 3. Grant shared host access (non-fatal — log warn and continue)
if err := p.client.grantHostAccess(ctx, p.sharedHost, acct.ID); err != nil {
p.logger.Warn("failed to grant notify host access",
"host", p.sharedHost,
"account_id", acct.ID,
"project_id", projectID,
"error", err,
)
}
p.logger.Info("notify provisioned",
"project_id", projectID,
"shared_host", p.sharedHost,
)
return &domain.NotifyCredentials{
ProjectID: projectID,
AccountID: acct.ID,
APIKey: key.Key,
CreatedAt: time.Now(),
}, nil
}
// verifyWithRetry waits for DNS propagation then attempts domain verification with retries.
// Called in a goroutine — all errors are logged and do not propagate.
func (p *Provisioner) verifyWithRetry(ctx context.Context, resendDomainID, host, projectID string) {
const (
initialDelay = 60 * time.Second
retryInterval = 30 * time.Second
maxAttempts = 5
)
// Wait for DNS propagation before first attempt.
select {
case <-ctx.Done():
return
case <-time.After(initialDelay):
}
for attempt := 1; attempt <= maxAttempts; attempt++ {
if err := p.resend.verifyDomain(ctx, resendDomainID); err != nil {
p.logger.Warn("resend domain verification attempt failed",
"attempt", attempt,
"max_attempts", maxAttempts,
"domain_id", resendDomainID,
"host", host,
"project_id", projectID,
"error", err,
)
if attempt < maxAttempts {
select {
case <-ctx.Done():
return
case <-time.After(retryInterval):
}
}
continue
}
p.logger.Info("resend domain verified",
"domain_id", resendDomainID,
"host", host,
"project_id", projectID,
"attempt", attempt,
)
return
}
p.logger.Warn("resend domain verification exhausted all attempts — re-verify manually via API",
"domain_id", resendDomainID,
"host", host,
"project_id", projectID,
)
}
// DeleteProjectNotify removes notify resources for a project.
// The notify account (and all cascaded keys and host grants) is always deleted.
// If perProjectHost is non-empty, the custom sending host, Resend domain, and DNS
// records are also deleted. Failures are logged as warnings — cleanup continues.
func (p *Provisioner) DeleteProjectNotify(ctx context.Context, projectID, perProjectHost, resendDomainID string) error {
// 1. Delete notify account (cascades keys + host grants)
acct, err := p.findAccountByProject(ctx, projectID)
if err != nil {
p.logger.Warn("failed to find notify account during deletion",
"project_id", projectID,
"error", err,
)
} else if acct != nil {
if err := p.client.deleteAccount(ctx, acct.ID); err != nil {
p.logger.Warn("failed to delete notify account",
"account_id", acct.ID,
"project_id", projectID,
"error", err,
)
}
}
// 2. If a per-project custom host was provisioned, clean it up.
if perProjectHost != "" {
if err := p.client.deleteHost(ctx, perProjectHost); err != nil {
p.logger.Warn("failed to delete notify host",
"host", perProjectHost,
"project_id", projectID,
"error", err,
)
}
if p.resend != nil && resendDomainID != "" {
if err := p.resend.deleteDomain(ctx, resendDomainID); err != nil {
p.logger.Warn("failed to delete resend domain",
"domain_id", resendDomainID,
"project_id", projectID,
"error", err,
)
}
}
// Delete Cloudflare DNS records for DKIM/SPF.
// Names follow Resend's standard format:
// DKIM: resend._domainkey.{host}
// SPF MX: send.{host}
// SPF TXT: send.{host}
// If Resend changes their record naming, manual cleanup may be needed.
if p.dns != nil {
dkimName := "resend._domainkey." + perProjectHost
if err := p.dns.DeleteRecordByName(ctx, "TXT", dkimName); err != nil {
p.logger.Warn("failed to delete DKIM DNS record",
"name", dkimName,
"project_id", projectID,
"error", err,
)
}
spfSendName := "send." + perProjectHost
if err := p.dns.DeleteRecordByName(ctx, "MX", spfSendName); err != nil {
p.logger.Warn("failed to delete SPF MX DNS record",
"name", spfSendName,
"project_id", projectID,
"error", err,
)
}
if err := p.dns.DeleteRecordByName(ctx, "TXT", spfSendName); err != nil {
p.logger.Warn("failed to delete SPF TXT DNS record",
"name", spfSendName,
"project_id", projectID,
"error", err,
)
}
}
}
p.logger.Info("notify resources deleted", "project_id", projectID)
return nil
}
// VerifyProjectNotify triggers Resend domain verification for the given domain ID.
// Call this after DNS records have had time to propagate (~60 seconds minimum).
func (p *Provisioner) VerifyProjectNotify(ctx context.Context, projectID, resendDomainID string) error {
if p.resend == nil {
return fmt.Errorf("notify: resend not configured")
}
if resendDomainID == "" {
return fmt.Errorf("notify: resend domain ID not available for project %s", projectID)
}
if err := p.resend.verifyDomain(ctx, resendDomainID); err != nil {
return fmt.Errorf("notify: verify domain for project %s: %w", projectID, err)
}
return nil
}
// ProvisionNotifyDomain creates the Resend domain for an existing notify host, adds DKIM/SPF DNS
// records via Cloudflare, and kicks off async verification. Use this to repair projects where
// Resend domain creation failed during initial provisioning (steps 7-9 of CreateProjectNotify).
// Returns the Resend domain ID which must be stored as NOTIFY_RESEND_DOMAIN_ID by the caller.
func (p *Provisioner) ProvisionNotifyDomain(ctx context.Context, projectID, host string) (string, error) {
if p.resend == nil {
return "", fmt.Errorf("notify: resend not configured")
}
if host == "" {
return "", fmt.Errorf("notify: host is required")
}
// Step 7: Create Resend domain for the existing notify host.
resendDomainID, dnsRecords, err := p.resend.createDomain(ctx, host, "us-east-1")
if err != nil {
return "", fmt.Errorf("notify: create resend domain for %s: %w", host, err)
}
p.logger.Info("resend domain created", "host", host, "domain_id", resendDomainID, "project_id", projectID)
// Step 8: Add DKIM/SPF DNS records (non-fatal).
// rec.Name is relative to the zone apex (e.g., "resend._domainkey.mail.slug").
// Cloudflare's normalizeName appends ".baseDomain" to build the FQDN.
if p.dns != nil && len(dnsRecords) > 0 {
for _, rec := range dnsRecords {
if _, upsertErr := p.dns.UpsertRecord(ctx, domain.DNSRecord{
Type: rec.DNSType,
Name: rec.Name,
Content: rec.Value,
TTL: 1,
Priority: rec.Priority,
}); upsertErr != nil {
p.logger.Warn("failed to upsert notify DNS record",
"name", rec.Name,
"type", rec.DNSType,
"project_id", projectID,
"error", upsertErr,
)
}
}
}
// Step 9: Fire-and-forget async verification with DNS propagation wait.
go func() {
verifyCtx := context.WithoutCancel(ctx)
p.verifyWithRetry(verifyCtx, resendDomainID, host, projectID)
}()
return resendDomainID, nil
}
// GetNotifyDomainStatus returns the Resend verification status for the project's email domain.
func (p *Provisioner) GetNotifyDomainStatus(ctx context.Context, host, resendDomainID string) (*domain.NotifyDomainStatus, error) {
if p.resend == nil || resendDomainID == "" {
return &domain.NotifyDomainStatus{
Host: host,
ResendDomainID: resendDomainID,
Status: "not_configured",
}, nil
}
status, err := p.resend.getDomainStatus(ctx, resendDomainID)
if err != nil {
return nil, fmt.Errorf("notify: get domain status for %s: %w", host, err)
}
return &domain.NotifyDomainStatus{
Host: host,
ResendDomainID: resendDomainID,
Status: status,
}, nil
}
// GetProjectNotify returns notify credentials for the project, or nil if not provisioned.
// Only AccountID and CreatedAt are recoverable after provisioning. Use this method
// solely to check whether provisioning has already occurred (non-nil = already provisioned).
func (p *Provisioner) GetProjectNotify(ctx context.Context, projectID string) (*domain.NotifyCredentials, error) {
acct, err := p.findAccountByProject(ctx, projectID)
if err != nil {
return nil, fmt.Errorf("notify: find account for project %s: %w", projectID, err)
}
if acct == nil {
return nil, nil
}
return &domain.NotifyCredentials{
ProjectID: projectID,
AccountID: acct.ID,
CreatedAt: acct.CreatedAt,
}, nil
}
// TestConnection verifies the notify admin API is reachable.
func (p *Provisioner) TestConnection(ctx context.Context) error {
_, err := p.client.listAccounts(ctx)
if err != nil {
return fmt.Errorf("notify admin API unreachable: %w", err)
}
return nil
}
// findAccountByProject looks up the account named "project-{projectID}".
func (p *Provisioner) findAccountByProject(ctx context.Context, projectID string) (*accountResponse, error) {
accounts, err := p.client.listAccounts(ctx)
if err != nil {
return nil, err
}
targetName := "project-" + projectID
for i := range accounts {
if accounts[i].Name == targetName {
return &accounts[i], nil
}
}
return nil, nil
}
// bestEffortDeleteHost deletes the notify host, logging on failure.
func (p *Provisioner) bestEffortDeleteHost(ctx context.Context, host, projectID string) {
if err := p.client.deleteHost(ctx, host); err != nil {
p.logger.Warn("failed to clean up notify host after provisioning failure",
"host", host,
"project_id", projectID,
"error", err,
)
}
}
// bestEffortDeleteAccount deletes the notify account, logging on failure.
func (p *Provisioner) bestEffortDeleteAccount(ctx context.Context, accountID, projectID string) {
if err := p.client.deleteAccount(ctx, accountID); err != nil {
p.logger.Warn("failed to clean up notify account after provisioning failure",
"account_id", accountID,
"project_id", projectID,
"error", err,
)
}
}