diff --git a/deployments/k8s/base/woodpecker-pvc-cleanup.yaml b/deployments/k8s/base/woodpecker-pvc-cleanup.yaml new file mode 100644 index 0000000..481bd61 --- /dev/null +++ b/deployments/k8s/base/woodpecker-pvc-cleanup.yaml @@ -0,0 +1,104 @@ +# CronJob to garbage-collect stale Woodpecker pipeline PVCs. +# +# Woodpecker's Kubernetes backend creates a PVC per pipeline for workspace +# sharing between step pods. If the agent crashes or restarts, PVCs can leak. +# A subsequent pipeline with a colliding name gets "already exists" and is +# marked as failed even though all steps succeed. +# +# This CronJob runs every 5 minutes and deletes wp-* PVCs older than 30 minutes. +# Normal pipelines finish in ~12 minutes, so 30 minutes is a safe threshold. +# +# See: https://github.com/woodpecker-ci/woodpecker/issues/1594 +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: woodpecker-pvc-cleanup + namespace: threesix + labels: + app.kubernetes.io/name: woodpecker-pvc-cleanup + app.kubernetes.io/part-of: woodpecker +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: woodpecker-pvc-cleanup + namespace: threesix + labels: + app.kubernetes.io/name: woodpecker-pvc-cleanup + app.kubernetes.io/part-of: woodpecker +rules: + - apiGroups: [""] + resources: ["persistentvolumeclaims"] + verbs: ["get", "list", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: woodpecker-pvc-cleanup + namespace: threesix + labels: + app.kubernetes.io/name: woodpecker-pvc-cleanup + app.kubernetes.io/part-of: woodpecker +subjects: + - kind: ServiceAccount + name: woodpecker-pvc-cleanup + namespace: threesix +roleRef: + kind: Role + name: woodpecker-pvc-cleanup + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: woodpecker-pvc-cleanup + namespace: threesix + labels: + app.kubernetes.io/name: woodpecker-pvc-cleanup + app.kubernetes.io/part-of: woodpecker +spec: + schedule: "*/5 * * * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + activeDeadlineSeconds: 60 + template: + spec: + serviceAccountName: woodpecker-pvc-cleanup + restartPolicy: Never + containers: + - name: cleanup + image: bitnami/kubectl:latest + command: + - /bin/sh + - -c + - | + set -e + echo "Checking for stale Woodpecker pipeline PVCs..." + NOW=$(date +%s) + THRESHOLD=1800 # 30 minutes in seconds + + # Get wp-* PVCs as "name creationTimestamp" pairs via jsonpath + kubectl get pvc -n threesix \ + -o jsonpath='{range .items[*]}{.metadata.name} {.metadata.creationTimestamp}{"\n"}{end}' \ + | grep '^wp-' | while read -r NAME TS; do + # Parse ISO timestamp to epoch (busybox date -d handles ISO 8601) + CREATED=$(date -d "$TS" +%s 2>/dev/null || echo 0) + AGE=$((NOW - CREATED)) + if [ "$AGE" -gt "$THRESHOLD" ]; then + echo "Deleting stale PVC: $NAME (age: ${AGE}s)" + kubectl delete pvc -n threesix "$NAME" --wait=false + fi + done + + echo "Cleanup complete." + resources: + requests: + cpu: 10m + memory: 32Mi + limits: + cpu: 100m + memory: 64Mi