#!/usr/bin/env bash
#
# StemeDB Backup Script
#
# Creates a timestamped backup of WAL and database files.
#
# Usage:
# ./scripts/backup-stemedb.sh # Default backup to backups/
# ./scripts/backup-stemedb.sh --output /mnt/nfs # Custom output directory
# ./scripts/backup-stemedb.sh --wal-only # Backup WAL only (faster)
#
# Exit codes:
# 0 - Backup completed successfully
# 1 - Backup failed
#
set -euo pipefail
# Configuration
readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
readonly PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
readonly WAL_DIR="${STEMEDB_WAL_DIR:-${PROJECT_DIR}/data/wal}"
readonly DB_DIR="${STEMEDB_DB_DIR:-${PROJECT_DIR}/data/db}"
readonly TIMESTAMP="$(date +%Y%m%d-%H%M%S)"
# Colors (if terminal supports it)
if [[ -t 1 ]]; then
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
NC='\033[0m'
else
RED=''
GREEN=''
YELLOW=''
BLUE=''
NC=''
fi
# Logging helpers
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
success() { echo -e "${GREEN}[OK]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
fail() { echo -e "${RED}[FAIL]${NC} $*"; exit 1; }
# Defaults
OUTPUT_DIR="${PROJECT_DIR}/backups"
WAL_ONLY=false
DRY_RUN=false
KEEP_LAST=""
UPLOAD_S3=false
S3_BUCKET="${AWS_S3_BUCKET:-}"
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--output)
OUTPUT_DIR="$2"
shift 2
;;
--wal-only)
WAL_ONLY=true
shift
;;
--dry-run)
DRY_RUN=true
shift
;;
--keep-last)
KEEP_LAST="$2"
shift 2
;;
--upload-s3)
UPLOAD_S3=true
shift
;;
--s3-bucket)
S3_BUCKET="$2"
shift 2
;;
--help|-h)
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Create a timestamped backup of StemeDB data."
echo ""
echo "Options:"
echo " --output
Output directory (default: backups/)"
echo " --wal-only Backup WAL directory only (skip DB)"
echo " --dry-run Show what would be done without executing"
echo " --keep-last Delete backups older than duration (e.g., 30d, 7d)"
echo " --upload-s3 Upload backup to S3 after creation"
echo " --s3-bucket S3 bucket name (default: AWS_S3_BUCKET env var)"
echo " --help Show this help message"
echo ""
echo "Environment:"
echo " STEMEDB_WAL_DIR WAL directory (default: data/wal)"
echo " STEMEDB_DB_DIR Database directory (default: data/db)"
echo " AWS_S3_BUCKET S3 bucket for uploads (default: none)"
echo " AWS_REGION AWS region (default: us-east-1)"
echo ""
echo "Examples:"
echo " $0 # Basic backup"
echo " $0 --keep-last 30d # Backup with 30-day retention"
echo " $0 --upload-s3 --s3-bucket my-bucket # Backup to S3"
echo " $0 --dry-run --keep-last 7d # Preview cleanup"
exit 0
;;
*)
fail "Unknown argument: $1 (use --help for usage)"
;;
esac
done
readonly BACKUP_DIR="${OUTPUT_DIR}/stemedb-backup-${TIMESTAMP}"
# Cleanup partial backup on failure
cleanup() {
local exit_code=$?
if [[ $exit_code -ne 0 && -d "$BACKUP_DIR" && "$DRY_RUN" == "false" ]]; then
warn "Backup failed, removing partial backup at ${BACKUP_DIR}"
rm -rf "$BACKUP_DIR"
fi
}
trap cleanup EXIT
# Parse duration string (e.g., "30d", "7d") to seconds
parse_duration() {
local duration="$1"
local value="${duration%?}"
local unit="${duration: -1}"
case "$unit" in
d) echo $((value * 86400)) ;;
h) echo $((value * 3600)) ;;
m) echo $((value * 60)) ;;
*) fail "Invalid duration unit: $unit (use d=days, h=hours, m=minutes)" ;;
esac
}
# Cleanup old backups based on retention policy
cleanup_old_backups() {
local retention_seconds
retention_seconds=$(parse_duration "$KEEP_LAST")
local cutoff_time
cutoff_time=$(($(date +%s) - retention_seconds))
info "Enforcing retention policy: keep backups from last ${KEEP_LAST}"
local removed_count=0
local kept_count=0
# Find all backup directories
while IFS= read -r -d '' backup_path; do
local backup_time
backup_time=$(stat -c %Y "$backup_path" 2>/dev/null || stat -f %m "$backup_path" 2>/dev/null)
if [[ $backup_time -lt $cutoff_time ]]; then
# Keep at least 3 most recent backups regardless of age
local total_backups
total_backups=$(find "$OUTPUT_DIR" -maxdepth 1 -type d -name "stemedb-backup-*" | wc -l)
if [[ $total_backups -gt 3 ]]; then
if [[ "$DRY_RUN" == "true" ]]; then
info "[DRY RUN] Would remove: $(basename "$backup_path")"
else
warn "Removing old backup: $(basename "$backup_path")"
rm -rf "$backup_path"
fi
removed_count=$((removed_count + 1))
else
info "Keeping backup (minimum 3 retained): $(basename "$backup_path")"
kept_count=$((kept_count + 1))
fi
else
kept_count=$((kept_count + 1))
fi
done < <(find "$OUTPUT_DIR" -maxdepth 1 -type d -name "stemedb-backup-*" -print0 | sort -z) || true
if [[ "$DRY_RUN" == "false" ]]; then
success "Retention: removed ${removed_count}, kept ${kept_count} backups"
else
info "[DRY RUN] Would remove: ${removed_count}, would keep: ${kept_count}"
fi
}
# Upload backup to S3
upload_to_s3() {
if [[ -z "$S3_BUCKET" ]]; then
fail "S3 bucket not specified (use --s3-bucket or set AWS_S3_BUCKET)"
fi
# Check if aws CLI is available
if ! command -v aws &> /dev/null; then
fail "AWS CLI not found. Install with: apt install awscli"
fi
local s3_path="s3://${S3_BUCKET}/$(basename "$BACKUP_DIR")"
info "Uploading backup to S3..."
info "Destination: ${s3_path}"
if [[ "$DRY_RUN" == "true" ]]; then
info "[DRY RUN] Would upload: ${BACKUP_DIR} -> ${s3_path}"
return 0
fi
# Upload with progress, use STANDARD_IA storage class for cost savings
if aws s3 sync "$BACKUP_DIR" "$s3_path" \
--storage-class STANDARD_IA \
--region "${AWS_REGION:-us-east-1}" \
2>&1 | tee /tmp/s3-upload.log; then
success "Uploaded to S3: ${s3_path}"
# Write S3 metrics
write_s3_metrics "$s3_path"
else
warn "S3 upload failed (backup still available locally)"
return 1
fi
}
# Write Prometheus metrics
write_backup_metrics() {
local metrics_file="${METRICS_DIR:-/var/lib/node_exporter/textfile_collector}/stemedb_backup.prom"
if [[ "$DRY_RUN" == "true" ]]; then
info "[DRY RUN] Would write metrics to: ${metrics_file}"
return 0
fi
# Create directory if it doesn't exist (for local dev)
if ! mkdir -p "$(dirname "$metrics_file")" 2>/dev/null; then
warn "Cannot create metrics directory, skipping metrics export"
return 0
fi
# Check if metrics file is writable
if ! touch "$metrics_file" 2>/dev/null; then
warn "Cannot write to metrics file, skipping metrics export"
return 0
fi
local now
now=$(date +%s)
cat > "$metrics_file" </dev/null | cut -f1 || echo 0)
# HELP stemedb_backup_wal_files Number of WAL files in backup
# TYPE stemedb_backup_wal_files gauge
stemedb_backup_wal_files $(find "${BACKUP_DIR}/wal" -type f 2>/dev/null | wc -l)
# HELP stemedb_backup_db_files Number of DB files in backup
# TYPE stemedb_backup_db_files gauge
stemedb_backup_db_files $(find "${BACKUP_DIR}/db" -type f 2>/dev/null | wc -l)
METRICS
success "Metrics written to: ${metrics_file}"
}
write_s3_metrics() {
local s3_path="$1"
local metrics_file="${METRICS_DIR:-/var/lib/node_exporter/textfile_collector}/stemedb_backup.prom"
# Check if metrics file exists and is writable
if [[ ! -f "$metrics_file" ]] || ! touch "$metrics_file" 2>/dev/null; then
warn "Cannot write S3 metrics (metrics file not writable)"
return 0
fi
# Append S3 metrics to existing file
cat >> "$metrics_file" </dev/null)" ]]; then
fail "WAL directory is empty: ${WAL_DIR}"
fi
if [[ "$WAL_ONLY" == "false" ]]; then
if [[ ! -d "$DB_DIR" ]]; then
fail "DB directory not found: ${DB_DIR}"
fi
if [[ -z "$(ls -A "$DB_DIR" 2>/dev/null)" ]]; then
fail "DB directory is empty: ${DB_DIR}"
fi
fi
# Handle dry run
if [[ "$DRY_RUN" == "true" ]]; then
info "[DRY RUN] Would create backup at: ${BACKUP_DIR}"
info "[DRY RUN] WAL source: ${WAL_DIR}"
if [[ "$WAL_ONLY" == "false" ]]; then
info "[DRY RUN] DB source: ${DB_DIR}"
fi
if [[ -n "$KEEP_LAST" ]]; then
cleanup_old_backups
fi
if [[ "$UPLOAD_S3" == "true" ]]; then
info "[DRY RUN] Would upload to S3 bucket: ${S3_BUCKET}"
fi
echo ""
echo "=========================================="
echo -e " ${BLUE}Dry run complete (no changes made)${NC}"
echo "=========================================="
return 0
fi
# Create backup directory
mkdir -p "$BACKUP_DIR"
info "Backup directory: ${BACKUP_DIR}"
# Backup WAL (append-only, safe to copy live)
info "Copying WAL directory..."
rsync -a "${WAL_DIR}/" "${BACKUP_DIR}/wal/"
local wal_files
wal_files=$(find "${BACKUP_DIR}/wal" -type f | wc -l)
local wal_size
wal_size=$(du -sh "${BACKUP_DIR}/wal" | cut -f1)
success "WAL: ${wal_files} files, ${wal_size}"
# Backup DB (unless --wal-only)
local db_files=0
local db_size="0"
if [[ "$WAL_ONLY" == "false" ]]; then
info "Copying DB directory..."
rsync -a "${DB_DIR}/" "${BACKUP_DIR}/db/"
db_files=$(find "${BACKUP_DIR}/db" -type f | wc -l)
db_size=$(du -sh "${BACKUP_DIR}/db" | cut -f1)
success "DB: ${db_files} files, ${db_size}"
else
info "Skipping DB (--wal-only)"
fi
# Compute total size
local total_size
total_size=$(du -sh "$BACKUP_DIR" | cut -f1)
# Write metadata
cat > "${BACKUP_DIR}/backup-metadata.json" </dev/null | grep -o '"stemedb-api","version":"[^"]*"' | head -1 | cut -d'"' -f6 || echo "unknown")"
}
METADATA
success "Metadata written"
# Write metrics
write_backup_metrics
# Cleanup old backups if retention policy specified
if [[ -n "$KEEP_LAST" ]]; then
cleanup_old_backups
fi
# Upload to S3 if requested
if [[ "$UPLOAD_S3" == "true" ]]; then
upload_to_s3
fi
# Summary
echo ""
echo "=========================================="
echo -e " ${GREEN}Backup complete${NC}"
echo "=========================================="
echo ""
echo " Location: ${BACKUP_DIR}"
echo " WAL files: ${wal_files} (${wal_size})"
if [[ "$WAL_ONLY" == "false" ]]; then
echo " DB files: ${db_files} (${db_size})"
fi
echo " Total: ${total_size}"
if [[ "$UPLOAD_S3" == "true" && -n "$S3_BUCKET" ]]; then
echo " S3 Upload: s3://${S3_BUCKET}/$(basename "$BACKUP_DIR")"
fi
echo ""
echo "Restore with:"
echo " ./scripts/restore-stemedb.sh ${BACKUP_DIR}"
echo ""
}
main "$@"