This commit implements comprehensive production hardening across multiple layers to prepare StemeDB for enterprise pilot deployments: ## API Layer - Add rate limiting middleware with configurable limits per endpoint - Enhance error handling with detailed context and proper HTTP status codes - Add security hardening tests for input validation and boundary conditions - Create store_helpers module for defensive storage access patterns ## Storage & WAL - Optimize group commit batching for higher throughput - Add defensive error handling in hybrid backend with proper fallbacks - Enhance WAL journal durability guarantees with fsync validation - Improve index store query performance with better caching ## Operations & Deployment - Add comprehensive operations documentation (deployment, monitoring, DR) - Create systemd units for backup, WAL archival, and verification - Add monitoring configs (Prometheus alerts, metrics exporters) - Implement backup/restore scripts with verification and S3 archival - Add DR drill automation and runbook procedures - Create load balancer configs (nginx, envoy) with health checks ## Documentation - Update CLAUDE.md with operations and troubleshooting guides - Expand roadmap with production readiness milestones - Add pilot success criteria and deployment reference architecture - Document TLS setup, monitoring integration, and incident response ## Configuration - Add .env.example with all required environment variables - Document resource sizing for different deployment scales - Add configuration examples for various deployment topologies This positions StemeDB for successful enterprise pilots with proper operational discipline, monitoring, backup/DR, and security hardening. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
403 lines
14 KiB
Rust
403 lines
14 KiB
Rust
//! Log rotation via segment files with global offset addressing.
|
|
//!
|
|
//! Each segment file is named `{base_offset:016x}.wal` where `base_offset` is
|
|
//! the global WAL offset where that segment begins. Reads resolve the correct
|
|
//! segment via binary search, and writes rotate to a new segment when the
|
|
//! current one exceeds `MAX_SEGMENT_SIZE`.
|
|
//!
|
|
//! # Cleanup
|
|
//!
|
|
//! `SegmentManager::cleanup(min_cursor)` deletes segments whose entire range
|
|
//! is below `min_cursor`, freeing disk space after consumers have advanced.
|
|
|
|
use crate::error::{QuarantineError, Result};
|
|
use crate::format::{FileHeader, HEADER_SIZE};
|
|
use std::fs;
|
|
use std::path::{Path, PathBuf};
|
|
use tracing::{debug, info, instrument, warn};
|
|
|
|
/// Default maximum segment size (1 GB).
|
|
pub const DEFAULT_MAX_SEGMENT_SIZE: u64 = 1024 * 1024 * 1024;
|
|
|
|
/// A single WAL segment file.
|
|
#[derive(Debug, Clone)]
|
|
pub struct Segment {
|
|
/// Global WAL offset where this segment starts.
|
|
pub base_offset: u64,
|
|
/// Path to the segment file.
|
|
pub path: PathBuf,
|
|
/// Current file size in bytes.
|
|
pub size: u64,
|
|
}
|
|
|
|
impl Segment {
|
|
/// Format a segment filename from its base offset.
|
|
pub fn filename(base_offset: u64) -> String {
|
|
format!("{:016x}.wal", base_offset)
|
|
}
|
|
|
|
/// Parse a base offset from a segment filename.
|
|
pub fn parse_filename(name: &str) -> Option<u64> {
|
|
let stem = name.strip_suffix(".wal")?;
|
|
if stem.len() != 16 {
|
|
return None;
|
|
}
|
|
u64::from_str_radix(stem, 16).ok()
|
|
}
|
|
}
|
|
|
|
/// Manages multiple WAL segment files.
|
|
pub struct SegmentManager {
|
|
/// Directory containing segment files.
|
|
data_dir: PathBuf,
|
|
/// Segments sorted by base_offset.
|
|
segments: Vec<Segment>,
|
|
/// Maximum size per segment before rotation.
|
|
max_segment_size: u64,
|
|
}
|
|
|
|
impl SegmentManager {
|
|
/// Open an existing segment directory, scanning for segment files.
|
|
#[instrument(skip_all, fields(data_dir = %data_dir.as_ref().display()))]
|
|
pub fn open(data_dir: impl AsRef<Path>, max_segment_size: u64) -> Result<Self> {
|
|
let data_dir = data_dir.as_ref().to_path_buf();
|
|
fs::create_dir_all(&data_dir).map_err(|e| QuarantineError::io(&data_dir, e))?;
|
|
|
|
let mut segments = Vec::new();
|
|
|
|
let entries = fs::read_dir(&data_dir).map_err(|e| QuarantineError::io(&data_dir, e))?;
|
|
for entry in entries {
|
|
let entry = entry.map_err(|e| QuarantineError::io(&data_dir, e))?;
|
|
let name = entry.file_name();
|
|
let name_str = name.to_string_lossy();
|
|
|
|
if let Some(base_offset) = Segment::parse_filename(&name_str) {
|
|
let meta = entry.metadata().map_err(|e| QuarantineError::io(entry.path(), e))?;
|
|
segments.push(Segment { base_offset, path: entry.path(), size: meta.len() });
|
|
}
|
|
}
|
|
|
|
segments.sort_by_key(|s| s.base_offset);
|
|
|
|
debug!(segment_count = segments.len(), "SegmentManager opened");
|
|
let mgr = Self { data_dir, segments, max_segment_size };
|
|
|
|
// Initialize metrics
|
|
mgr.update_metrics();
|
|
|
|
Ok(mgr)
|
|
}
|
|
|
|
/// Rescan the data directory for new segment files.
|
|
///
|
|
/// This is used by read-only journal instances that need to discover
|
|
/// segments created by a separate writer instance.
|
|
#[instrument(skip(self), fields(data_dir = %self.data_dir.display()))]
|
|
pub fn refresh(&mut self) -> Result<()> {
|
|
let mut segments = Vec::new();
|
|
|
|
let entries =
|
|
fs::read_dir(&self.data_dir).map_err(|e| QuarantineError::io(&self.data_dir, e))?;
|
|
for entry in entries {
|
|
let entry = entry.map_err(|e| QuarantineError::io(&self.data_dir, e))?;
|
|
let name = entry.file_name();
|
|
let name_str = name.to_string_lossy();
|
|
|
|
if let Some(base_offset) = Segment::parse_filename(&name_str) {
|
|
let meta = entry.metadata().map_err(|e| QuarantineError::io(entry.path(), e))?;
|
|
segments.push(Segment { base_offset, path: entry.path(), size: meta.len() });
|
|
}
|
|
}
|
|
|
|
segments.sort_by_key(|s| s.base_offset);
|
|
debug!(segment_count = segments.len(), "SegmentManager refreshed");
|
|
self.segments = segments;
|
|
|
|
// Update metrics after refresh
|
|
self.update_metrics();
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Get all segments, sorted by base_offset.
|
|
pub fn segments(&self) -> &[Segment] {
|
|
&self.segments
|
|
}
|
|
|
|
/// Find the segment containing the given global offset.
|
|
///
|
|
/// Uses binary search: finds the last segment whose `base_offset <= offset`.
|
|
pub fn resolve_segment(&self, offset: u64) -> Option<&Segment> {
|
|
if self.segments.is_empty() {
|
|
return None;
|
|
}
|
|
|
|
// Binary search for the largest base_offset <= offset
|
|
let idx = match self.segments.binary_search_by_key(&offset, |s| s.base_offset) {
|
|
Ok(exact) => exact,
|
|
Err(insert) => {
|
|
if insert == 0 {
|
|
return None; // offset is before all segments
|
|
}
|
|
insert - 1
|
|
}
|
|
};
|
|
|
|
Some(&self.segments[idx])
|
|
}
|
|
|
|
/// Get the current (latest) segment, if any.
|
|
pub fn current_segment(&self) -> Option<&Segment> {
|
|
self.segments.last()
|
|
}
|
|
|
|
/// Check if the current segment needs rotation.
|
|
pub fn needs_rotation(&self, current_segment_size: u64) -> bool {
|
|
current_segment_size >= self.max_segment_size
|
|
}
|
|
|
|
/// Update the cached size of the current (latest) segment.
|
|
///
|
|
/// Call this after appending data to keep the cached size in sync with
|
|
/// the actual file size. This ensures that `read()` operations can use
|
|
/// the cached size for bounds checking without a disk stat call.
|
|
pub fn update_current_segment_size(&mut self, new_size: u64) {
|
|
if let Some(segment) = self.segments.last_mut() {
|
|
segment.size = new_size;
|
|
}
|
|
}
|
|
|
|
/// Create a new segment with the given base offset.
|
|
///
|
|
/// Writes a v2 FileHeader to the new file and adds it to the segment list.
|
|
#[instrument(skip(self), fields(base_offset))]
|
|
pub fn create_segment(&mut self, base_offset: u64) -> Result<&Segment> {
|
|
let filename = Segment::filename(base_offset);
|
|
let path = self.data_dir.join(&filename);
|
|
|
|
// Write header
|
|
let header = FileHeader::new();
|
|
let mut buf = Vec::with_capacity(HEADER_SIZE);
|
|
header.write_to(&mut buf)?;
|
|
fs::write(&path, &buf).map_err(|e| QuarantineError::io(&path, e))?;
|
|
|
|
let segment = Segment { base_offset, path, size: HEADER_SIZE as u64 };
|
|
|
|
self.segments.push(segment);
|
|
|
|
// Update metrics
|
|
self.update_metrics();
|
|
|
|
info!(base_offset, filename, "Created new segment");
|
|
|
|
self.segments.last().ok_or_else(|| {
|
|
QuarantineError::IoGeneric(std::io::Error::other("segment list unexpectedly empty"))
|
|
})
|
|
}
|
|
|
|
/// Delete segments whose entire range is below `min_cursor`.
|
|
///
|
|
/// A segment can be deleted if the *next* segment's base_offset <= min_cursor,
|
|
/// meaning no reads will ever need the deleted segment.
|
|
///
|
|
/// Returns the number of bytes freed.
|
|
#[instrument(skip(self))]
|
|
pub fn cleanup(&mut self, min_cursor: u64) -> Result<u64> {
|
|
let mut freed = 0u64;
|
|
let mut to_remove = Vec::new();
|
|
|
|
for (i, _segment) in self.segments.iter().enumerate() {
|
|
// Can only delete if there's a next segment and it starts at or below min_cursor
|
|
if i + 1 < self.segments.len() && self.segments[i + 1].base_offset <= min_cursor {
|
|
to_remove.push(i);
|
|
}
|
|
}
|
|
|
|
// Remove in reverse order to preserve indices
|
|
for &idx in to_remove.iter().rev() {
|
|
let segment = &self.segments[idx];
|
|
info!(
|
|
base_offset = segment.base_offset,
|
|
size = segment.size,
|
|
path = %segment.path.display(),
|
|
"Deleting old segment"
|
|
);
|
|
match fs::remove_file(&segment.path) {
|
|
Ok(()) => {
|
|
freed += segment.size;
|
|
self.segments.remove(idx);
|
|
}
|
|
Err(e) => {
|
|
warn!(
|
|
error = %e,
|
|
path = %segment.path.display(),
|
|
"Failed to delete segment file, keeping in list"
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
if freed > 0 {
|
|
info!(
|
|
freed_bytes = freed,
|
|
remaining_segments = self.segments.len(),
|
|
"Cleanup complete"
|
|
);
|
|
|
|
// Update metrics after cleanup
|
|
self.update_metrics();
|
|
}
|
|
|
|
Ok(freed)
|
|
}
|
|
|
|
/// Get the data directory path.
|
|
pub fn data_dir(&self) -> &Path {
|
|
&self.data_dir
|
|
}
|
|
|
|
/// Update metrics for disk usage and segment count.
|
|
fn update_metrics(&self) {
|
|
let total_disk_usage: u64 = self.segments.iter().map(|s| s.size).sum();
|
|
metrics::gauge!("stemedb_wal_disk_usage_bytes").set(total_disk_usage as f64);
|
|
metrics::gauge!("stemedb_wal_segments_count").set(self.segments.len() as f64);
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use tempfile::tempdir;
|
|
|
|
#[test]
|
|
fn test_segment_name_roundtrip() {
|
|
let offsets = [0u64, 1, 255, 65536, 0xDEAD_BEEF, u64::MAX];
|
|
for offset in offsets {
|
|
let name = Segment::filename(offset);
|
|
let parsed = Segment::parse_filename(&name);
|
|
assert_eq!(parsed, Some(offset), "Roundtrip failed for offset {}", offset);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_filename_rejects_invalid() {
|
|
assert_eq!(Segment::parse_filename("not_a_wal.txt"), None);
|
|
assert_eq!(Segment::parse_filename("short.wal"), None);
|
|
assert_eq!(Segment::parse_filename("0000000000000000.log"), None);
|
|
assert_eq!(Segment::parse_filename(""), None);
|
|
// Too many hex digits
|
|
assert_eq!(Segment::parse_filename("00000000000000000.wal"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_resolve_segment_binary_search() {
|
|
let dir = tempdir().expect("tempdir");
|
|
let mut mgr = SegmentManager::open(dir.path(), DEFAULT_MAX_SEGMENT_SIZE).expect("open");
|
|
|
|
// Create segments at offsets 0, 1000, 2000
|
|
mgr.create_segment(0).expect("seg 0");
|
|
mgr.create_segment(1000).expect("seg 1000");
|
|
mgr.create_segment(2000).expect("seg 2000");
|
|
|
|
// Offset 0 -> segment 0
|
|
assert_eq!(mgr.resolve_segment(0).map(|s| s.base_offset), Some(0));
|
|
// Offset 500 -> segment 0
|
|
assert_eq!(mgr.resolve_segment(500).map(|s| s.base_offset), Some(0));
|
|
// Offset 999 -> segment 0
|
|
assert_eq!(mgr.resolve_segment(999).map(|s| s.base_offset), Some(0));
|
|
// Offset 1000 -> segment 1000
|
|
assert_eq!(mgr.resolve_segment(1000).map(|s| s.base_offset), Some(1000));
|
|
// Offset 1500 -> segment 1000
|
|
assert_eq!(mgr.resolve_segment(1500).map(|s| s.base_offset), Some(1000));
|
|
// Offset 2000 -> segment 2000
|
|
assert_eq!(mgr.resolve_segment(2000).map(|s| s.base_offset), Some(2000));
|
|
// Offset 99999 -> segment 2000
|
|
assert_eq!(mgr.resolve_segment(99999).map(|s| s.base_offset), Some(2000));
|
|
}
|
|
|
|
#[test]
|
|
fn test_resolve_segment_empty() {
|
|
let dir = tempdir().expect("tempdir");
|
|
let mgr = SegmentManager::open(dir.path(), DEFAULT_MAX_SEGMENT_SIZE).expect("open");
|
|
assert!(mgr.resolve_segment(0).is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn test_rotation_creates_new_segment() {
|
|
let dir = tempdir().expect("tempdir");
|
|
// Small threshold for testing: 1KB
|
|
let mut mgr = SegmentManager::open(dir.path(), 1024).expect("open");
|
|
|
|
mgr.create_segment(0).expect("create seg 0");
|
|
assert_eq!(mgr.segments().len(), 1);
|
|
|
|
// Simulate that segment 0 grew beyond threshold
|
|
assert!(mgr.needs_rotation(2048));
|
|
assert!(!mgr.needs_rotation(512));
|
|
|
|
mgr.create_segment(2048).expect("create seg 2048");
|
|
assert_eq!(mgr.segments().len(), 2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_cleanup_deletes_old_segments() {
|
|
let dir = tempdir().expect("tempdir");
|
|
let mut mgr = SegmentManager::open(dir.path(), DEFAULT_MAX_SEGMENT_SIZE).expect("open");
|
|
|
|
mgr.create_segment(0).expect("seg 0");
|
|
mgr.create_segment(1000).expect("seg 1000");
|
|
mgr.create_segment(2000).expect("seg 2000");
|
|
assert_eq!(mgr.segments().len(), 3);
|
|
|
|
// Cleanup with min_cursor=1500: can delete seg 0 (next seg starts at 1000 <= 1500)
|
|
let freed = mgr.cleanup(1500).expect("cleanup");
|
|
assert!(freed > 0);
|
|
assert_eq!(mgr.segments().len(), 2);
|
|
assert_eq!(mgr.segments()[0].base_offset, 1000);
|
|
|
|
// Cleanup with min_cursor=2500: can delete seg 1000 (next starts at 2000 <= 2500)
|
|
let freed = mgr.cleanup(2500).expect("cleanup");
|
|
assert!(freed > 0);
|
|
assert_eq!(mgr.segments().len(), 1);
|
|
assert_eq!(mgr.segments()[0].base_offset, 2000);
|
|
|
|
// Last segment is never deleted
|
|
let freed = mgr.cleanup(u64::MAX).expect("cleanup");
|
|
assert_eq!(freed, 0);
|
|
assert_eq!(mgr.segments().len(), 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_segment_manager_scans_existing_files() {
|
|
let dir = tempdir().expect("tempdir");
|
|
|
|
// Create segments manually, then reopen
|
|
{
|
|
let mut mgr = SegmentManager::open(dir.path(), DEFAULT_MAX_SEGMENT_SIZE).expect("open");
|
|
mgr.create_segment(0).expect("seg 0");
|
|
mgr.create_segment(5000).expect("seg 5000");
|
|
mgr.create_segment(10000).expect("seg 10000");
|
|
}
|
|
|
|
// Reopen and verify scan
|
|
let mgr = SegmentManager::open(dir.path(), DEFAULT_MAX_SEGMENT_SIZE).expect("reopen");
|
|
assert_eq!(mgr.segments().len(), 3);
|
|
assert_eq!(mgr.segments()[0].base_offset, 0);
|
|
assert_eq!(mgr.segments()[1].base_offset, 5000);
|
|
assert_eq!(mgr.segments()[2].base_offset, 10000);
|
|
}
|
|
|
|
#[test]
|
|
fn test_segment_file_has_valid_header() {
|
|
let dir = tempdir().expect("tempdir");
|
|
let mut mgr = SegmentManager::open(dir.path(), DEFAULT_MAX_SEGMENT_SIZE).expect("open");
|
|
mgr.create_segment(0).expect("seg 0");
|
|
|
|
// Read the file and verify header
|
|
let data = std::fs::read(&mgr.segments()[0].path).expect("read");
|
|
assert_eq!(data.len(), HEADER_SIZE);
|
|
assert_eq!(&data[0..4], b"STEM");
|
|
assert_eq!(data[4], 2); // version
|
|
}
|
|
}
|