This commit implements comprehensive production hardening across multiple layers to prepare StemeDB for enterprise pilot deployments: ## API Layer - Add rate limiting middleware with configurable limits per endpoint - Enhance error handling with detailed context and proper HTTP status codes - Add security hardening tests for input validation and boundary conditions - Create store_helpers module for defensive storage access patterns ## Storage & WAL - Optimize group commit batching for higher throughput - Add defensive error handling in hybrid backend with proper fallbacks - Enhance WAL journal durability guarantees with fsync validation - Improve index store query performance with better caching ## Operations & Deployment - Add comprehensive operations documentation (deployment, monitoring, DR) - Create systemd units for backup, WAL archival, and verification - Add monitoring configs (Prometheus alerts, metrics exporters) - Implement backup/restore scripts with verification and S3 archival - Add DR drill automation and runbook procedures - Create load balancer configs (nginx, envoy) with health checks ## Documentation - Update CLAUDE.md with operations and troubleshooting guides - Expand roadmap with production readiness milestones - Add pilot success criteria and deployment reference architecture - Document TLS setup, monitoring integration, and incident response ## Configuration - Add .env.example with all required environment variables - Document resource sizing for different deployment scales - Add configuration examples for various deployment topologies This positions StemeDB for successful enterprise pilots with proper operational discipline, monitoring, backup/DR, and security hardening. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
390 lines
17 KiB
Plaintext
390 lines
17 KiB
Plaintext
# Nginx Reverse Proxy Configuration for StemeDB
|
|
#
|
|
# This configuration provides:
|
|
# - TLS 1.3 termination with Let's Encrypt
|
|
# - HTTP → HTTPS redirect
|
|
# - Request size limits (2MB)
|
|
# - Rate limiting (100 req/sec per IP)
|
|
# - Security headers (HSTS, X-Frame-Options)
|
|
# - Health-checked upstream (single-node or cluster)
|
|
# - Admin endpoint restrictions (VPN-only)
|
|
# - Metrics endpoint restrictions (internal-only)
|
|
#
|
|
# Installation:
|
|
# sudo cp stemedb.conf /etc/nginx/sites-available/
|
|
# sudo ln -s /etc/nginx/sites-available/stemedb.conf /etc/nginx/sites-enabled/
|
|
# sudo nginx -t
|
|
# sudo systemctl reload nginx
|
|
|
|
# ┌───────────────────────────────────────────────────────────┐
|
|
# │ Rate Limiting Zones │
|
|
# └───────────────────────────────────────────────────────────┘
|
|
|
|
# Zone for general API requests (100 req/sec per IP)
|
|
limit_req_zone $binary_remote_addr zone=api_limit:10m rate=100r/s;
|
|
|
|
# Zone for write-heavy endpoints (10 req/sec per IP)
|
|
limit_req_zone $binary_remote_addr zone=write_limit:10m rate=10r/s;
|
|
|
|
# Connection limit (max 10 concurrent per IP)
|
|
limit_conn_zone $binary_remote_addr zone=conn_limit:10m;
|
|
|
|
# ┌───────────────────────────────────────────────────────────┐
|
|
# │ Upstream Configuration │
|
|
# └───────────────────────────────────────────────────────────┘
|
|
|
|
# Single-node configuration
|
|
upstream stemedb_backend {
|
|
server localhost:18180;
|
|
|
|
# Health check (requires nginx_upstream_check_module)
|
|
# check interval=5000 rise=2 fall=3 timeout=3000;
|
|
|
|
# Connection keepalive
|
|
keepalive 32;
|
|
}
|
|
|
|
# Three-node cluster configuration (comment out single-node above)
|
|
# upstream stemedb_cluster {
|
|
# # Round-robin (default)
|
|
# server 10.0.1.51:18180 weight=1 max_fails=3 fail_timeout=30s;
|
|
# server 10.0.1.52:18180 weight=1 max_fails=3 fail_timeout=30s;
|
|
# server 10.0.1.53:18180 weight=1 max_fails=3 fail_timeout=30s;
|
|
#
|
|
# # Connection keepalive
|
|
# keepalive 32;
|
|
# }
|
|
|
|
# ┌───────────────────────────────────────────────────────────┐
|
|
# │ HTTP → HTTPS Redirect │
|
|
# └───────────────────────────────────────────────────────────┘
|
|
|
|
server {
|
|
listen 80;
|
|
listen [::]:80;
|
|
server_name stemedb.example.com;
|
|
|
|
# Let's Encrypt ACME challenge
|
|
location /.well-known/acme-challenge/ {
|
|
root /var/www/certbot;
|
|
}
|
|
|
|
# Redirect all other traffic to HTTPS
|
|
location / {
|
|
return 301 https://$server_name$request_uri;
|
|
}
|
|
}
|
|
|
|
# ┌───────────────────────────────────────────────────────────┐
|
|
# │ HTTPS Server (Main Configuration) │
|
|
# └───────────────────────────────────────────────────────────┘
|
|
|
|
server {
|
|
listen 443 ssl http2;
|
|
listen [::]:443 ssl http2;
|
|
server_name stemedb.example.com;
|
|
|
|
# ─────────────────────────────────────────────────────────
|
|
# TLS Configuration
|
|
# ─────────────────────────────────────────────────────────
|
|
|
|
# Let's Encrypt certificates (managed by certbot)
|
|
ssl_certificate /etc/letsencrypt/live/stemedb.example.com/fullchain.pem;
|
|
ssl_certificate_key /etc/letsencrypt/live/stemedb.example.com/privkey.pem;
|
|
|
|
# TLS 1.3 only (most secure)
|
|
ssl_protocols TLSv1.3;
|
|
|
|
# Strong ciphers (TLS 1.3)
|
|
ssl_prefer_server_ciphers on;
|
|
ssl_ciphers 'TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256:TLS_AES_128_GCM_SHA256';
|
|
|
|
# SSL session cache
|
|
ssl_session_cache shared:SSL:10m;
|
|
ssl_session_timeout 10m;
|
|
ssl_session_tickets off;
|
|
|
|
# OCSP Stapling
|
|
ssl_stapling on;
|
|
ssl_stapling_verify on;
|
|
ssl_trusted_certificate /etc/letsencrypt/live/stemedb.example.com/chain.pem;
|
|
resolver 8.8.8.8 8.8.4.4 valid=300s;
|
|
resolver_timeout 5s;
|
|
|
|
# ─────────────────────────────────────────────────────────
|
|
# Security Headers
|
|
# ─────────────────────────────────────────────────────────
|
|
|
|
# HSTS (1 year, include subdomains)
|
|
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains; preload" always;
|
|
|
|
# Prevent clickjacking
|
|
add_header X-Frame-Options "SAMEORIGIN" always;
|
|
|
|
# Content type sniffing
|
|
add_header X-Content-Type-Options "nosniff" always;
|
|
|
|
# XSS protection
|
|
add_header X-XSS-Protection "1; mode=block" always;
|
|
|
|
# Referrer policy
|
|
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
|
|
|
|
# CSP (Content Security Policy)
|
|
add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data:; font-src 'self'; connect-src 'self';" always;
|
|
|
|
# ─────────────────────────────────────────────────────────
|
|
# Logging
|
|
# ─────────────────────────────────────────────────────────
|
|
|
|
access_log /var/log/nginx/stemedb-access.log combined;
|
|
error_log /var/log/nginx/stemedb-error.log warn;
|
|
|
|
# ─────────────────────────────────────────────────────────
|
|
# Global Limits
|
|
# ─────────────────────────────────────────────────────────
|
|
|
|
# Max request body size (2MB for assertions)
|
|
client_max_body_size 2M;
|
|
|
|
# Timeout settings
|
|
proxy_connect_timeout 10s;
|
|
proxy_send_timeout 30s;
|
|
proxy_read_timeout 30s;
|
|
|
|
# Connection limits
|
|
limit_conn conn_limit 10;
|
|
|
|
# ─────────────────────────────────────────────────────────
|
|
# Health Check Endpoint (Public)
|
|
# ─────────────────────────────────────────────────────────
|
|
|
|
location = /v1/health {
|
|
proxy_pass http://stemedb_backend;
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Connection "";
|
|
|
|
# No rate limiting on health checks
|
|
limit_req off;
|
|
|
|
# Fast timeout for health checks
|
|
proxy_connect_timeout 3s;
|
|
proxy_send_timeout 5s;
|
|
proxy_read_timeout 5s;
|
|
}
|
|
|
|
# ─────────────────────────────────────────────────────────
|
|
# Write Endpoints (Stricter Rate Limits)
|
|
# ─────────────────────────────────────────────────────────
|
|
|
|
location ~ ^/v1/(assert|retract)$ {
|
|
# Apply write rate limit (10 req/sec, burst 20)
|
|
limit_req zone=write_limit burst=20 nodelay;
|
|
|
|
proxy_pass http://stemedb_backend;
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Connection "";
|
|
proxy_set_header Host $host;
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
proxy_set_header X-Forwarded-Proto $scheme;
|
|
|
|
# Don't retry writes (not idempotent)
|
|
proxy_next_upstream off;
|
|
}
|
|
|
|
# ─────────────────────────────────────────────────────────
|
|
# Query Endpoints (Standard Rate Limits)
|
|
# ─────────────────────────────────────────────────────────
|
|
|
|
location /v1/query {
|
|
# Apply API rate limit (100 req/sec, burst 200)
|
|
limit_req zone=api_limit burst=200 nodelay;
|
|
|
|
proxy_pass http://stemedb_backend;
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Connection "";
|
|
proxy_set_header Host $host;
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
proxy_set_header X-Forwarded-Proto $scheme;
|
|
|
|
# Retry on specific errors
|
|
proxy_next_upstream error timeout http_502 http_503;
|
|
proxy_next_upstream_tries 2;
|
|
proxy_next_upstream_timeout 10s;
|
|
}
|
|
|
|
# ─────────────────────────────────────────────────────────
|
|
# Admin Endpoints (Restricted to Internal Network)
|
|
# ─────────────────────────────────────────────────────────
|
|
|
|
location /v1/admin/ {
|
|
# ⚠️ CRITICAL: Admin endpoints have NO authentication
|
|
# Restrict to internal network only
|
|
|
|
# Allow from internal network
|
|
allow 10.0.0.0/8;
|
|
allow 172.16.0.0/12;
|
|
allow 192.168.0.0/16;
|
|
|
|
# Or allow from specific VPN subnet
|
|
# allow 10.8.0.0/24;
|
|
|
|
# Deny all others
|
|
deny all;
|
|
|
|
proxy_pass http://stemedb_backend;
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Connection "";
|
|
proxy_set_header Host $host;
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
proxy_set_header X-Forwarded-Proto $scheme;
|
|
}
|
|
|
|
# ─────────────────────────────────────────────────────────
|
|
# Metrics Endpoint (Restricted to Prometheus)
|
|
# ─────────────────────────────────────────────────────────
|
|
|
|
location /metrics {
|
|
# Only allow from Prometheus server
|
|
allow 10.0.1.100; # Replace with your Prometheus IP
|
|
|
|
# Deny all others
|
|
deny all;
|
|
|
|
proxy_pass http://stemedb_backend;
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Connection "";
|
|
|
|
# No rate limiting on metrics
|
|
limit_req off;
|
|
}
|
|
|
|
# ─────────────────────────────────────────────────────────
|
|
# Dashboard (Public with Rate Limiting)
|
|
# ─────────────────────────────────────────────────────────
|
|
|
|
location / {
|
|
# Apply API rate limit
|
|
limit_req zone=api_limit burst=200 nodelay;
|
|
|
|
proxy_pass http://stemedb_backend;
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Connection "";
|
|
proxy_set_header Host $host;
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
proxy_set_header X-Forwarded-Proto $scheme;
|
|
proxy_set_header Upgrade $http_upgrade;
|
|
proxy_set_header Connection "upgrade"; # For WebSocket support
|
|
}
|
|
|
|
# ─────────────────────────────────────────────────────────
|
|
# Static Files (Optional - for custom dashboard assets)
|
|
# ─────────────────────────────────────────────────────────
|
|
|
|
# location /static/ {
|
|
# alias /var/www/stemedb/static/;
|
|
# expires 1y;
|
|
# add_header Cache-Control "public, immutable";
|
|
# }
|
|
|
|
# ─────────────────────────────────────────────────────────
|
|
# Error Pages
|
|
# ─────────────────────────────────────────────────────────
|
|
|
|
error_page 502 503 504 /50x.html;
|
|
location = /50x.html {
|
|
root /usr/share/nginx/html;
|
|
internal;
|
|
}
|
|
|
|
# Custom 429 (rate limit) page
|
|
error_page 429 /429.html;
|
|
location = /429.html {
|
|
root /usr/share/nginx/html;
|
|
internal;
|
|
}
|
|
|
|
# Custom 403 (forbidden) page
|
|
error_page 403 /403.html;
|
|
location = /403.html {
|
|
root /usr/share/nginx/html;
|
|
internal;
|
|
}
|
|
}
|
|
|
|
# ┌───────────────────────────────────────────────────────────┐
|
|
# │ Usage Instructions │
|
|
# └───────────────────────────────────────────────────────────┘
|
|
#
|
|
# 1. Install certbot:
|
|
# sudo apt install certbot python3-certbot-nginx
|
|
#
|
|
# 2. Obtain certificate:
|
|
# sudo certbot --nginx -d stemedb.example.com
|
|
#
|
|
# 3. Copy config:
|
|
# sudo cp stemedb.conf /etc/nginx/sites-available/
|
|
#
|
|
# 4. Update variables:
|
|
# - Replace stemedb.example.com with your domain
|
|
# - Update internal network ranges (10.0.0.0/8)
|
|
# - Update Prometheus IP (10.0.1.100)
|
|
#
|
|
# 5. Enable site:
|
|
# sudo ln -s /etc/nginx/sites-available/stemedb.conf /etc/nginx/sites-enabled/
|
|
#
|
|
# 6. Test config:
|
|
# sudo nginx -t
|
|
#
|
|
# 7. Reload nginx:
|
|
# sudo systemctl reload nginx
|
|
#
|
|
# 8. Test endpoints:
|
|
# curl https://stemedb.example.com/v1/health
|
|
#
|
|
# 9. Set up auto-renewal:
|
|
# sudo crontab -e
|
|
# # Add: 0 3 * * * certbot renew --quiet && systemctl reload nginx
|
|
|
|
# ┌───────────────────────────────────────────────────────────┐
|
|
# │ Monitoring & Troubleshooting │
|
|
# └───────────────────────────────────────────────────────────┘
|
|
#
|
|
# View access logs:
|
|
# sudo tail -f /var/log/nginx/stemedb-access.log
|
|
#
|
|
# View error logs:
|
|
# sudo tail -f /var/log/nginx/stemedb-error.log
|
|
#
|
|
# Check rate limit status:
|
|
# sudo grep "limiting requests" /var/log/nginx/stemedb-error.log
|
|
#
|
|
# Test rate limiting:
|
|
# for i in {1..150}; do curl https://stemedb.example.com/v1/health; done
|
|
# # Should see 429 after 100 requests
|
|
#
|
|
# Check TLS configuration:
|
|
# openssl s_client -connect stemedb.example.com:443 -tls1_3
|
|
#
|
|
# Test security headers:
|
|
# curl -I https://stemedb.example.com/v1/health
|
|
|
|
# ┌───────────────────────────────────────────────────────────┐
|
|
# │ Production Hardening Checklist │
|
|
# └───────────────────────────────────────────────────────────┘
|
|
#
|
|
# - [ ] Enable ModSecurity WAF (optional)
|
|
# - [ ] Set up fail2ban for DDoS protection
|
|
# - [ ] Configure log rotation (logrotate)
|
|
# - [ ] Set up centralized logging (ELK, Splunk)
|
|
# - [ ] Enable nginx status page (/nginx_status) for monitoring
|
|
# - [ ] Configure backup upstream servers
|
|
# - [ ] Set up nginx Prometheus exporter
|
|
# - [ ] Test certificate renewal process
|
|
# - [ ] Document rate limit thresholds
|
|
# - [ ] Create custom error pages (50x.html, 429.html)
|