From f6ced22e0607e470e58a51ad5f42df4560ca91c8 Mon Sep 17 00:00:00 2001 From: jordan Date: Sat, 31 Jan 2026 20:46:04 -0700 Subject: [PATCH] fix: Use FQDN for k8s service hostnames and remove broken commonLabels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Short-form DNS names (e.g. postgres.databases.svc) fail to resolve in new pods due to k8s DNS search domain limitations. Switch all service hostnames to FQDNs (*.svc.cluster.local). Remove commonLabels from kustomization.yaml — it injected labels into all selectors including NetworkPolicy egress rules (blocking DNS to CoreDNS) and Deployment selectors (causing immutability errors). Add OTEL_EXPORTER_OTLP_ENDPOINT env var to deployment YAML so the telemetry collector endpoint uses the FQDN without requiring a binary rebuild. Co-Authored-By: Claude Opus 4.5 --- cmd/rdev-api/config.go | 10 +++++----- deployments/k8s/base/kustomization.yaml | 3 --- deployments/k8s/base/rdev-api.yaml | 9 ++++++--- internal/adapter/cockroach/provisioner.go | 2 +- internal/db/postgres.go | 2 +- internal/telemetry/telemetry.go | 6 +++--- internal/telemetry/telemetry_test.go | 2 +- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/cmd/rdev-api/config.go b/cmd/rdev-api/config.go index e964c95..aab47a4 100644 --- a/cmd/rdev-api/config.go +++ b/cmd/rdev-api/config.go @@ -63,13 +63,13 @@ type InfraConfig struct { WoodpeckerWebhookSecret string // CockroachDB provisioner (for project databases) - CRDBHost string // e.g., "cockroachdb-public.databases.svc" + CRDBHost string // e.g., "cockroachdb-public.databases.svc.cluster.local" CRDBPort int // e.g., 26257 CRDBUser string // e.g., "root" (insecure mode) CRDBSSLMode string // e.g., "disable" (insecure) or "verify-full" (production) // Redis provisioner (for project cache) - RedisHost string // e.g., "redis.threesix.svc" + RedisHost string // e.g., "redis.databases.svc.cluster.local" RedisPort int // e.g., 6379 RedisPassword string // admin password for ACL management } @@ -77,7 +77,7 @@ type InfraConfig struct { func loadConfig() Config { return Config{ Port: envutil.GetEnvInt("PORT", 8080), - DBHost: envutil.GetEnv("DB_HOST", "postgres.databases.svc"), + DBHost: envutil.GetEnv("DB_HOST", "postgres.databases.svc.cluster.local"), DBPort: envutil.GetEnvInt("DB_PORT", 5432), DBUser: envutil.GetEnv("DB_USER", "appuser"), DBPassword: os.Getenv("DB_PASSWORD"), @@ -154,11 +154,11 @@ func loadInfraConfig(ctx context.Context, store port.CredentialStore, cfg Config WoodpeckerWebhookSecret: getOrFallback(domain.CredKeyWoodpeckerWebhookSecret, cfg.WoodpeckerWebhookSecret), // CockroachDB and Redis provisioners (env-only for now) - CRDBHost: os.Getenv("CRDB_HOST"), // e.g., "cockroachdb-public.databases.svc" + CRDBHost: os.Getenv("CRDB_HOST"), // e.g., "cockroachdb-public.databases.svc.cluster.local" CRDBPort: envutil.GetEnvInt("CRDB_PORT", 26257), CRDBUser: envutil.GetEnv("CRDB_USER", "root"), CRDBSSLMode: envutil.GetEnv("CRDB_SSL_MODE", "disable"), - RedisHost: os.Getenv("REDIS_HOST"), // e.g., "redis.threesix.svc" + RedisHost: os.Getenv("REDIS_HOST"), // e.g., "redis.databases.svc.cluster.local" RedisPort: envutil.GetEnvInt("REDIS_PORT", 6379), RedisPassword: os.Getenv("REDIS_PASSWORD"), } diff --git a/deployments/k8s/base/kustomization.yaml b/deployments/k8s/base/kustomization.yaml index c8c4471..709fa82 100644 --- a/deployments/k8s/base/kustomization.yaml +++ b/deployments/k8s/base/kustomization.yaml @@ -27,6 +27,3 @@ resources: - pdb.yaml - network-policy.yaml -commonLabels: - app.kubernetes.io/managed-by: kustomize - app.kubernetes.io/part-of: rdev diff --git a/deployments/k8s/base/rdev-api.yaml b/deployments/k8s/base/rdev-api.yaml index 2232388..73abaa7 100644 --- a/deployments/k8s/base/rdev-api.yaml +++ b/deployments/k8s/base/rdev-api.yaml @@ -69,7 +69,7 @@ spec: - name: PORT value: "8080" - name: DB_HOST - value: "postgres.databases.svc" + value: "postgres.databases.svc.cluster.local" - name: DB_PORT value: "5432" - name: DB_USER @@ -123,7 +123,7 @@ spec: key: WOODPECKER_API_TOKEN # CockroachDB for project database provisioning - name: CRDB_HOST - value: "cockroachdb-public.databases.svc" + value: "cockroachdb-public.databases.svc.cluster.local" - name: CRDB_PORT value: "26257" - name: CRDB_USER @@ -132,7 +132,7 @@ spec: value: "disable" # Redis for project cache provisioning - name: REDIS_HOST - value: "redis.databases.svc" + value: "redis.databases.svc.cluster.local" - name: REDIS_PORT value: "6379" - name: REDIS_PASSWORD @@ -140,6 +140,9 @@ spec: secretKeyRef: name: redis-credentials key: REDIS_PASSWORD + # OpenTelemetry + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: "otel-collector.observability.svc.cluster.local:4317" imagePullSecrets: - name: ghcr-secret diff --git a/internal/adapter/cockroach/provisioner.go b/internal/adapter/cockroach/provisioner.go index 3145be7..12b3869 100644 --- a/internal/adapter/cockroach/provisioner.go +++ b/internal/adapter/cockroach/provisioner.go @@ -27,7 +27,7 @@ type Provisioner struct { // Config holds CockroachDB provisioner configuration. type Config struct { - Host string // e.g., "cockroachdb-public.databases.svc" + Host string // e.g., "cockroachdb-public.databases.svc.cluster.local" Port int // e.g., 26257 User string // e.g., "root" (for insecure mode) SSLMode string // e.g., "disable" (for insecure mode) diff --git a/internal/db/postgres.go b/internal/db/postgres.go index 3c2f038..f6151ab 100644 --- a/internal/db/postgres.go +++ b/internal/db/postgres.go @@ -30,7 +30,7 @@ type Config struct { // DefaultConfig returns config from environment defaults. func DefaultConfig() Config { return Config{ - Host: "postgres.databases.svc", + Host: "postgres.databases.svc.cluster.local", Port: 5432, User: "appuser", Password: "", diff --git a/internal/telemetry/telemetry.go b/internal/telemetry/telemetry.go index 68508cb..d2d4d6a 100644 --- a/internal/telemetry/telemetry.go +++ b/internal/telemetry/telemetry.go @@ -4,7 +4,7 @@ // Traces are exported to an OpenTelemetry collector (e.g., otel-collector in k8s). // // Configuration via environment variables: -// - OTEL_EXPORTER_OTLP_ENDPOINT: Collector endpoint (default: otel-collector.observability.svc:4317) +// - OTEL_EXPORTER_OTLP_ENDPOINT: Collector endpoint (default: otel-collector.observability.svc.cluster.local:4317) // - OTEL_SERVICE_NAME: Service name for traces (default: rdev-api) // - OTEL_SERVICE_VERSION: Service version (default: unknown) // - OTEL_SERVICE_NAMESPACE: Namespace (default: rdev) @@ -33,7 +33,7 @@ import ( // Config holds telemetry configuration. type Config struct { // Endpoint is the OTLP collector endpoint (gRPC). - // Default: otel-collector.observability.svc:4317 + // Default: otel-collector.observability.svc.cluster.local:4317 Endpoint string // ServiceName identifies this service in traces. @@ -63,7 +63,7 @@ type Config struct { // DefaultConfig returns configuration with defaults applied. func DefaultConfig() Config { return Config{ - Endpoint: envutil.GetEnv("OTEL_EXPORTER_OTLP_ENDPOINT", "otel-collector.observability.svc:4317"), + Endpoint: envutil.GetEnv("OTEL_EXPORTER_OTLP_ENDPOINT", "otel-collector.observability.svc.cluster.local:4317"), ServiceName: envutil.GetEnv("OTEL_SERVICE_NAME", "rdev-api"), ServiceVersion: envutil.GetEnv("OTEL_SERVICE_VERSION", "unknown"), ServiceNamespace: envutil.GetEnv("OTEL_SERVICE_NAMESPACE", "rdev"), diff --git a/internal/telemetry/telemetry_test.go b/internal/telemetry/telemetry_test.go index 054f9db..2d408af 100644 --- a/internal/telemetry/telemetry_test.go +++ b/internal/telemetry/telemetry_test.go @@ -21,7 +21,7 @@ func TestDefaultConfig(t *testing.T) { cfg := DefaultConfig() - if cfg.Endpoint != "otel-collector.observability.svc:4317" { + if cfg.Endpoint != "otel-collector.observability.svc.cluster.local:4317" { t.Errorf("expected default endpoint, got %s", cfg.Endpoint) } if cfg.ServiceName != "rdev-api" {