This commit implements comprehensive production hardening across multiple layers to prepare StemeDB for enterprise pilot deployments: ## API Layer - Add rate limiting middleware with configurable limits per endpoint - Enhance error handling with detailed context and proper HTTP status codes - Add security hardening tests for input validation and boundary conditions - Create store_helpers module for defensive storage access patterns ## Storage & WAL - Optimize group commit batching for higher throughput - Add defensive error handling in hybrid backend with proper fallbacks - Enhance WAL journal durability guarantees with fsync validation - Improve index store query performance with better caching ## Operations & Deployment - Add comprehensive operations documentation (deployment, monitoring, DR) - Create systemd units for backup, WAL archival, and verification - Add monitoring configs (Prometheus alerts, metrics exporters) - Implement backup/restore scripts with verification and S3 archival - Add DR drill automation and runbook procedures - Create load balancer configs (nginx, envoy) with health checks ## Documentation - Update CLAUDE.md with operations and troubleshooting guides - Expand roadmap with production readiness milestones - Add pilot success criteria and deployment reference architecture - Document TLS setup, monitoring integration, and incident response ## Configuration - Add .env.example with all required environment variables - Document resource sizing for different deployment scales - Add configuration examples for various deployment topologies This positions StemeDB for successful enterprise pilots with proper operational discipline, monitoring, backup/DR, and security hardening. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
127 lines
4.0 KiB
Bash
Executable File
127 lines
4.0 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# StemeDB Backup & DR Integration Tests (Simplified)
|
|
#
|
|
# Quick validation that P5.3 components work together.
|
|
#
|
|
|
|
set -euo pipefail
|
|
|
|
PROJECT_DIR="/home/jml/Workspace/stemedb"
|
|
TEST_DIR="/tmp/stemedb-backup-test-$$"
|
|
|
|
GREEN='\033[0;32m'
|
|
RED='\033[0;31m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
|
|
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
|
|
pass() { echo -e "${GREEN}[PASS]${NC} $*"; }
|
|
fail() { echo -e "${RED}[FAIL]${NC} $*"; exit 1; }
|
|
|
|
cleanup() {
|
|
rm -rf "$TEST_DIR"
|
|
}
|
|
trap cleanup EXIT
|
|
|
|
echo ""
|
|
echo "=========================================="
|
|
echo " P5.3 Backup & DR Tests"
|
|
echo "=========================================="
|
|
echo ""
|
|
|
|
# Setup
|
|
info "Setting up test environment..."
|
|
mkdir -p "$TEST_DIR"/{wal,db,backups,metrics}
|
|
|
|
# Create minimal test data
|
|
printf '\x53\x54\x45\x4d' > "$TEST_DIR/wal/test.wal"
|
|
echo "test data" >> "$TEST_DIR/wal/test.wal"
|
|
echo "test data" > "$TEST_DIR/db/test.kv"
|
|
|
|
pass "Test environment ready"
|
|
|
|
# Test 1: Backup creation
|
|
info "Test 1: Backup creation..."
|
|
STEMEDB_WAL_DIR="$TEST_DIR/wal" \
|
|
STEMEDB_DB_DIR="$TEST_DIR/db" \
|
|
METRICS_DIR="$TEST_DIR/metrics" \
|
|
"$PROJECT_DIR/scripts/backup-stemedb.sh" --output "$TEST_DIR/backups" >/dev/null 2>&1
|
|
|
|
BACKUP_COUNT=$(find "$TEST_DIR/backups" -maxdepth 1 -type d -name "stemedb-backup-*" | wc -l)
|
|
if [[ $BACKUP_COUNT -eq 1 ]]; then
|
|
pass "Backup created"
|
|
else
|
|
fail "Backup not created (found $BACKUP_COUNT backups)"
|
|
fi
|
|
|
|
# Test 2: Backup structure
|
|
info "Test 2: Backup structure..."
|
|
BACKUP=$(find "$TEST_DIR/backups" -name "stemedb-backup-*" -type d | head -n1)
|
|
[[ -f "$BACKUP/backup-metadata.json" ]] || fail "Missing metadata.json"
|
|
[[ -d "$BACKUP/wal" ]] || fail "Missing wal/"
|
|
[[ -d "$BACKUP/db" ]] || fail "Missing db/"
|
|
pass "Backup structure valid"
|
|
|
|
# Test 3: Metrics export
|
|
info "Test 3: Metrics export..."
|
|
[[ -f "$TEST_DIR/metrics/stemedb_backup.prom" ]] || fail "Metrics not exported"
|
|
grep -q "stemedb_backup_last_success_timestamp" "$TEST_DIR/metrics/stemedb_backup.prom" || fail "Missing metrics"
|
|
pass "Metrics exported"
|
|
|
|
# Test 4: Verification
|
|
info "Test 4: Backup verification..."
|
|
METRICS_DIR="$TEST_DIR/metrics" \
|
|
"$PROJECT_DIR/scripts/verify-backup.sh" "$BACKUP" >/dev/null 2>&1 || fail "Verification failed"
|
|
grep -q "stemedb_backup_verification_status.*1" "$TEST_DIR/metrics/stemedb_backup.prom" || fail "Verification status incorrect"
|
|
pass "Verification passed"
|
|
|
|
# Test 5: Retention
|
|
info "Test 5: Retention policy..."
|
|
for i in {1..3}; do
|
|
sleep 1
|
|
STEMEDB_WAL_DIR="$TEST_DIR/wal" \
|
|
STEMEDB_DB_DIR="$TEST_DIR/db" \
|
|
METRICS_DIR="$TEST_DIR/metrics" \
|
|
"$PROJECT_DIR/scripts/backup-stemedb.sh" --output "$TEST_DIR/backups" >/dev/null 2>&1
|
|
done
|
|
|
|
BACKUP_COUNT=$(find "$TEST_DIR/backups" -name "stemedb-backup-*" -type d | wc -l)
|
|
[[ $BACKUP_COUNT -eq 4 ]] || fail "Expected 4 backups, found $BACKUP_COUNT"
|
|
|
|
STEMEDB_WAL_DIR="$TEST_DIR/wal" \
|
|
STEMEDB_DB_DIR="$TEST_DIR/db" \
|
|
METRICS_DIR="$TEST_DIR/metrics" \
|
|
"$PROJECT_DIR/scripts/backup-stemedb.sh" \
|
|
--output "$TEST_DIR/backups" \
|
|
--keep-last 1d >/dev/null 2>&1
|
|
|
|
BACKUP_COUNT_AFTER=$(find "$TEST_DIR/backups" -name "stemedb-backup-*" -type d | wc -l)
|
|
[[ $BACKUP_COUNT_AFTER -ge 3 ]] || fail "Retention too aggressive"
|
|
pass "Retention policy working"
|
|
|
|
# Test 6: Dry run
|
|
info "Test 6: Dry run mode..."
|
|
BEFORE=$(find "$TEST_DIR/backups" -name "stemedb-backup-*" -type d | wc -l)
|
|
STEMEDB_WAL_DIR="$TEST_DIR/wal" \
|
|
STEMEDB_DB_DIR="$TEST_DIR/db" \
|
|
"$PROJECT_DIR/scripts/backup-stemedb.sh" \
|
|
--output "$TEST_DIR/backups" \
|
|
--dry-run >/dev/null 2>&1
|
|
|
|
AFTER=$(find "$TEST_DIR/backups" -name "stemedb-backup-*" -type d | wc -l)
|
|
[[ $BEFORE -eq $AFTER ]] || fail "Dry run created backup"
|
|
pass "Dry run mode working"
|
|
|
|
# Test 7: Alert rules
|
|
info "Test 7: Alert rules..."
|
|
[[ -f "$PROJECT_DIR/docs/operations/deployment/prometheus/backup-alerts.yml" ]] || fail "Alert rules missing"
|
|
pass "Alert rules present"
|
|
|
|
# Summary
|
|
echo ""
|
|
echo "=========================================="
|
|
echo -e " ${GREEN}All tests passed (7/7)${NC}"
|
|
echo "=========================================="
|
|
echo ""
|