#!/usr/bin/env bash # # StemeDB Backup Script # # Creates a timestamped backup of WAL and database files. # # Usage: # ./scripts/backup-stemedb.sh # Default backup to backups/ # ./scripts/backup-stemedb.sh --output /mnt/nfs # Custom output directory # ./scripts/backup-stemedb.sh --wal-only # Backup WAL only (faster) # # Exit codes: # 0 - Backup completed successfully # 1 - Backup failed # set -euo pipefail # Configuration readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" readonly PROJECT_DIR="$(dirname "$SCRIPT_DIR")" readonly WAL_DIR="${STEMEDB_WAL_DIR:-${PROJECT_DIR}/data/wal}" readonly DB_DIR="${STEMEDB_DB_DIR:-${PROJECT_DIR}/data/db}" readonly TIMESTAMP="$(date +%Y%m%d-%H%M%S)" # Colors (if terminal supports it) if [[ -t 1 ]]; then RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' BLUE='\033[0;34m' NC='\033[0m' else RED='' GREEN='' YELLOW='' BLUE='' NC='' fi # Logging helpers info() { echo -e "${BLUE}[INFO]${NC} $*"; } success() { echo -e "${GREEN}[OK]${NC} $*"; } warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } fail() { echo -e "${RED}[FAIL]${NC} $*"; exit 1; } # Defaults OUTPUT_DIR="${PROJECT_DIR}/backups" WAL_ONLY=false DRY_RUN=false KEEP_LAST="" UPLOAD_S3=false S3_BUCKET="${AWS_S3_BUCKET:-}" # Parse arguments while [[ $# -gt 0 ]]; do case $1 in --output) OUTPUT_DIR="$2" shift 2 ;; --wal-only) WAL_ONLY=true shift ;; --dry-run) DRY_RUN=true shift ;; --keep-last) KEEP_LAST="$2" shift 2 ;; --upload-s3) UPLOAD_S3=true shift ;; --s3-bucket) S3_BUCKET="$2" shift 2 ;; --help|-h) echo "Usage: $0 [OPTIONS]" echo "" echo "Create a timestamped backup of StemeDB data." echo "" echo "Options:" echo " --output Output directory (default: backups/)" echo " --wal-only Backup WAL directory only (skip DB)" echo " --dry-run Show what would be done without executing" echo " --keep-last Delete backups older than duration (e.g., 30d, 7d)" echo " --upload-s3 Upload backup to S3 after creation" echo " --s3-bucket S3 bucket name (default: AWS_S3_BUCKET env var)" echo " --help Show this help message" echo "" echo "Environment:" echo " STEMEDB_WAL_DIR WAL directory (default: data/wal)" echo " STEMEDB_DB_DIR Database directory (default: data/db)" echo " AWS_S3_BUCKET S3 bucket for uploads (default: none)" echo " AWS_REGION AWS region (default: us-east-1)" echo "" echo "Examples:" echo " $0 # Basic backup" echo " $0 --keep-last 30d # Backup with 30-day retention" echo " $0 --upload-s3 --s3-bucket my-bucket # Backup to S3" echo " $0 --dry-run --keep-last 7d # Preview cleanup" exit 0 ;; *) fail "Unknown argument: $1 (use --help for usage)" ;; esac done readonly BACKUP_DIR="${OUTPUT_DIR}/stemedb-backup-${TIMESTAMP}" # Cleanup partial backup on failure cleanup() { local exit_code=$? if [[ $exit_code -ne 0 && -d "$BACKUP_DIR" && "$DRY_RUN" == "false" ]]; then warn "Backup failed, removing partial backup at ${BACKUP_DIR}" rm -rf "$BACKUP_DIR" fi } trap cleanup EXIT # Parse duration string (e.g., "30d", "7d") to seconds parse_duration() { local duration="$1" local value="${duration%?}" local unit="${duration: -1}" case "$unit" in d) echo $((value * 86400)) ;; h) echo $((value * 3600)) ;; m) echo $((value * 60)) ;; *) fail "Invalid duration unit: $unit (use d=days, h=hours, m=minutes)" ;; esac } # Cleanup old backups based on retention policy cleanup_old_backups() { local retention_seconds retention_seconds=$(parse_duration "$KEEP_LAST") local cutoff_time cutoff_time=$(($(date +%s) - retention_seconds)) info "Enforcing retention policy: keep backups from last ${KEEP_LAST}" local removed_count=0 local kept_count=0 # Find all backup directories while IFS= read -r -d '' backup_path; do local backup_time backup_time=$(stat -c %Y "$backup_path" 2>/dev/null || stat -f %m "$backup_path" 2>/dev/null) if [[ $backup_time -lt $cutoff_time ]]; then # Keep at least 3 most recent backups regardless of age local total_backups total_backups=$(find "$OUTPUT_DIR" -maxdepth 1 -type d -name "stemedb-backup-*" | wc -l) if [[ $total_backups -gt 3 ]]; then if [[ "$DRY_RUN" == "true" ]]; then info "[DRY RUN] Would remove: $(basename "$backup_path")" else warn "Removing old backup: $(basename "$backup_path")" rm -rf "$backup_path" fi removed_count=$((removed_count + 1)) else info "Keeping backup (minimum 3 retained): $(basename "$backup_path")" kept_count=$((kept_count + 1)) fi else kept_count=$((kept_count + 1)) fi done < <(find "$OUTPUT_DIR" -maxdepth 1 -type d -name "stemedb-backup-*" -print0 | sort -z) || true if [[ "$DRY_RUN" == "false" ]]; then success "Retention: removed ${removed_count}, kept ${kept_count} backups" else info "[DRY RUN] Would remove: ${removed_count}, would keep: ${kept_count}" fi } # Upload backup to S3 upload_to_s3() { if [[ -z "$S3_BUCKET" ]]; then fail "S3 bucket not specified (use --s3-bucket or set AWS_S3_BUCKET)" fi # Check if aws CLI is available if ! command -v aws &> /dev/null; then fail "AWS CLI not found. Install with: apt install awscli" fi local s3_path="s3://${S3_BUCKET}/$(basename "$BACKUP_DIR")" info "Uploading backup to S3..." info "Destination: ${s3_path}" if [[ "$DRY_RUN" == "true" ]]; then info "[DRY RUN] Would upload: ${BACKUP_DIR} -> ${s3_path}" return 0 fi # Upload with progress, use STANDARD_IA storage class for cost savings if aws s3 sync "$BACKUP_DIR" "$s3_path" \ --storage-class STANDARD_IA \ --region "${AWS_REGION:-us-east-1}" \ 2>&1 | tee /tmp/s3-upload.log; then success "Uploaded to S3: ${s3_path}" # Write S3 metrics write_s3_metrics "$s3_path" else warn "S3 upload failed (backup still available locally)" return 1 fi } # Write Prometheus metrics write_backup_metrics() { local metrics_file="${METRICS_DIR:-/var/lib/node_exporter/textfile_collector}/stemedb_backup.prom" if [[ "$DRY_RUN" == "true" ]]; then info "[DRY RUN] Would write metrics to: ${metrics_file}" return 0 fi # Create directory if it doesn't exist (for local dev) if ! mkdir -p "$(dirname "$metrics_file")" 2>/dev/null; then warn "Cannot create metrics directory, skipping metrics export" return 0 fi # Check if metrics file is writable if ! touch "$metrics_file" 2>/dev/null; then warn "Cannot write to metrics file, skipping metrics export" return 0 fi local now now=$(date +%s) cat > "$metrics_file" </dev/null | cut -f1 || echo 0) # HELP stemedb_backup_wal_files Number of WAL files in backup # TYPE stemedb_backup_wal_files gauge stemedb_backup_wal_files $(find "${BACKUP_DIR}/wal" -type f 2>/dev/null | wc -l) # HELP stemedb_backup_db_files Number of DB files in backup # TYPE stemedb_backup_db_files gauge stemedb_backup_db_files $(find "${BACKUP_DIR}/db" -type f 2>/dev/null | wc -l) METRICS success "Metrics written to: ${metrics_file}" } write_s3_metrics() { local s3_path="$1" local metrics_file="${METRICS_DIR:-/var/lib/node_exporter/textfile_collector}/stemedb_backup.prom" # Check if metrics file exists and is writable if [[ ! -f "$metrics_file" ]] || ! touch "$metrics_file" 2>/dev/null; then warn "Cannot write S3 metrics (metrics file not writable)" return 0 fi # Append S3 metrics to existing file cat >> "$metrics_file" </dev/null)" ]]; then fail "WAL directory is empty: ${WAL_DIR}" fi if [[ "$WAL_ONLY" == "false" ]]; then if [[ ! -d "$DB_DIR" ]]; then fail "DB directory not found: ${DB_DIR}" fi if [[ -z "$(ls -A "$DB_DIR" 2>/dev/null)" ]]; then fail "DB directory is empty: ${DB_DIR}" fi fi # Handle dry run if [[ "$DRY_RUN" == "true" ]]; then info "[DRY RUN] Would create backup at: ${BACKUP_DIR}" info "[DRY RUN] WAL source: ${WAL_DIR}" if [[ "$WAL_ONLY" == "false" ]]; then info "[DRY RUN] DB source: ${DB_DIR}" fi if [[ -n "$KEEP_LAST" ]]; then cleanup_old_backups fi if [[ "$UPLOAD_S3" == "true" ]]; then info "[DRY RUN] Would upload to S3 bucket: ${S3_BUCKET}" fi echo "" echo "==========================================" echo -e " ${BLUE}Dry run complete (no changes made)${NC}" echo "==========================================" return 0 fi # Create backup directory mkdir -p "$BACKUP_DIR" info "Backup directory: ${BACKUP_DIR}" # Backup WAL (append-only, safe to copy live) info "Copying WAL directory..." rsync -a "${WAL_DIR}/" "${BACKUP_DIR}/wal/" local wal_files wal_files=$(find "${BACKUP_DIR}/wal" -type f | wc -l) local wal_size wal_size=$(du -sh "${BACKUP_DIR}/wal" | cut -f1) success "WAL: ${wal_files} files, ${wal_size}" # Backup DB (unless --wal-only) local db_files=0 local db_size="0" if [[ "$WAL_ONLY" == "false" ]]; then info "Copying DB directory..." rsync -a "${DB_DIR}/" "${BACKUP_DIR}/db/" db_files=$(find "${BACKUP_DIR}/db" -type f | wc -l) db_size=$(du -sh "${BACKUP_DIR}/db" | cut -f1) success "DB: ${db_files} files, ${db_size}" else info "Skipping DB (--wal-only)" fi # Compute total size local total_size total_size=$(du -sh "$BACKUP_DIR" | cut -f1) # Write metadata cat > "${BACKUP_DIR}/backup-metadata.json" </dev/null | grep -o '"stemedb-api","version":"[^"]*"' | head -1 | cut -d'"' -f6 || echo "unknown")" } METADATA success "Metadata written" # Write metrics write_backup_metrics # Cleanup old backups if retention policy specified if [[ -n "$KEEP_LAST" ]]; then cleanup_old_backups fi # Upload to S3 if requested if [[ "$UPLOAD_S3" == "true" ]]; then upload_to_s3 fi # Summary echo "" echo "==========================================" echo -e " ${GREEN}Backup complete${NC}" echo "==========================================" echo "" echo " Location: ${BACKUP_DIR}" echo " WAL files: ${wal_files} (${wal_size})" if [[ "$WAL_ONLY" == "false" ]]; then echo " DB files: ${db_files} (${db_size})" fi echo " Total: ${total_size}" if [[ "$UPLOAD_S3" == "true" && -n "$S3_BUCKET" ]]; then echo " S3 Upload: s3://${S3_BUCKET}/$(basename "$BACKUP_DIR")" fi echo "" echo "Restore with:" echo " ./scripts/restore-stemedb.sh ${BACKUP_DIR}" echo "" } main "$@"