stemedb/crates/stemedb-wal/src/segment.rs
jml 3e7eddc074 feat: add enterprise production readiness infrastructure
This commit implements comprehensive production hardening across multiple
layers to prepare StemeDB for enterprise pilot deployments:

## API Layer
- Add rate limiting middleware with configurable limits per endpoint
- Enhance error handling with detailed context and proper HTTP status codes
- Add security hardening tests for input validation and boundary conditions
- Create store_helpers module for defensive storage access patterns

## Storage & WAL
- Optimize group commit batching for higher throughput
- Add defensive error handling in hybrid backend with proper fallbacks
- Enhance WAL journal durability guarantees with fsync validation
- Improve index store query performance with better caching

## Operations & Deployment
- Add comprehensive operations documentation (deployment, monitoring, DR)
- Create systemd units for backup, WAL archival, and verification
- Add monitoring configs (Prometheus alerts, metrics exporters)
- Implement backup/restore scripts with verification and S3 archival
- Add DR drill automation and runbook procedures
- Create load balancer configs (nginx, envoy) with health checks

## Documentation
- Update CLAUDE.md with operations and troubleshooting guides
- Expand roadmap with production readiness milestones
- Add pilot success criteria and deployment reference architecture
- Document TLS setup, monitoring integration, and incident response

## Configuration
- Add .env.example with all required environment variables
- Document resource sizing for different deployment scales
- Add configuration examples for various deployment topologies

This positions StemeDB for successful enterprise pilots with proper
operational discipline, monitoring, backup/DR, and security hardening.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-12 06:08:15 +00:00

403 lines
14 KiB
Rust

//! Log rotation via segment files with global offset addressing.
//!
//! Each segment file is named `{base_offset:016x}.wal` where `base_offset` is
//! the global WAL offset where that segment begins. Reads resolve the correct
//! segment via binary search, and writes rotate to a new segment when the
//! current one exceeds `MAX_SEGMENT_SIZE`.
//!
//! # Cleanup
//!
//! `SegmentManager::cleanup(min_cursor)` deletes segments whose entire range
//! is below `min_cursor`, freeing disk space after consumers have advanced.
use crate::error::{QuarantineError, Result};
use crate::format::{FileHeader, HEADER_SIZE};
use std::fs;
use std::path::{Path, PathBuf};
use tracing::{debug, info, instrument, warn};
/// Default maximum segment size (1 GB).
pub const DEFAULT_MAX_SEGMENT_SIZE: u64 = 1024 * 1024 * 1024;
/// A single WAL segment file.
#[derive(Debug, Clone)]
pub struct Segment {
/// Global WAL offset where this segment starts.
pub base_offset: u64,
/// Path to the segment file.
pub path: PathBuf,
/// Current file size in bytes.
pub size: u64,
}
impl Segment {
/// Format a segment filename from its base offset.
pub fn filename(base_offset: u64) -> String {
format!("{:016x}.wal", base_offset)
}
/// Parse a base offset from a segment filename.
pub fn parse_filename(name: &str) -> Option<u64> {
let stem = name.strip_suffix(".wal")?;
if stem.len() != 16 {
return None;
}
u64::from_str_radix(stem, 16).ok()
}
}
/// Manages multiple WAL segment files.
pub struct SegmentManager {
/// Directory containing segment files.
data_dir: PathBuf,
/// Segments sorted by base_offset.
segments: Vec<Segment>,
/// Maximum size per segment before rotation.
max_segment_size: u64,
}
impl SegmentManager {
/// Open an existing segment directory, scanning for segment files.
#[instrument(skip_all, fields(data_dir = %data_dir.as_ref().display()))]
pub fn open(data_dir: impl AsRef<Path>, max_segment_size: u64) -> Result<Self> {
let data_dir = data_dir.as_ref().to_path_buf();
fs::create_dir_all(&data_dir).map_err(|e| QuarantineError::io(&data_dir, e))?;
let mut segments = Vec::new();
let entries = fs::read_dir(&data_dir).map_err(|e| QuarantineError::io(&data_dir, e))?;
for entry in entries {
let entry = entry.map_err(|e| QuarantineError::io(&data_dir, e))?;
let name = entry.file_name();
let name_str = name.to_string_lossy();
if let Some(base_offset) = Segment::parse_filename(&name_str) {
let meta = entry.metadata().map_err(|e| QuarantineError::io(entry.path(), e))?;
segments.push(Segment { base_offset, path: entry.path(), size: meta.len() });
}
}
segments.sort_by_key(|s| s.base_offset);
debug!(segment_count = segments.len(), "SegmentManager opened");
let mgr = Self { data_dir, segments, max_segment_size };
// Initialize metrics
mgr.update_metrics();
Ok(mgr)
}
/// Rescan the data directory for new segment files.
///
/// This is used by read-only journal instances that need to discover
/// segments created by a separate writer instance.
#[instrument(skip(self), fields(data_dir = %self.data_dir.display()))]
pub fn refresh(&mut self) -> Result<()> {
let mut segments = Vec::new();
let entries =
fs::read_dir(&self.data_dir).map_err(|e| QuarantineError::io(&self.data_dir, e))?;
for entry in entries {
let entry = entry.map_err(|e| QuarantineError::io(&self.data_dir, e))?;
let name = entry.file_name();
let name_str = name.to_string_lossy();
if let Some(base_offset) = Segment::parse_filename(&name_str) {
let meta = entry.metadata().map_err(|e| QuarantineError::io(entry.path(), e))?;
segments.push(Segment { base_offset, path: entry.path(), size: meta.len() });
}
}
segments.sort_by_key(|s| s.base_offset);
debug!(segment_count = segments.len(), "SegmentManager refreshed");
self.segments = segments;
// Update metrics after refresh
self.update_metrics();
Ok(())
}
/// Get all segments, sorted by base_offset.
pub fn segments(&self) -> &[Segment] {
&self.segments
}
/// Find the segment containing the given global offset.
///
/// Uses binary search: finds the last segment whose `base_offset <= offset`.
pub fn resolve_segment(&self, offset: u64) -> Option<&Segment> {
if self.segments.is_empty() {
return None;
}
// Binary search for the largest base_offset <= offset
let idx = match self.segments.binary_search_by_key(&offset, |s| s.base_offset) {
Ok(exact) => exact,
Err(insert) => {
if insert == 0 {
return None; // offset is before all segments
}
insert - 1
}
};
Some(&self.segments[idx])
}
/// Get the current (latest) segment, if any.
pub fn current_segment(&self) -> Option<&Segment> {
self.segments.last()
}
/// Check if the current segment needs rotation.
pub fn needs_rotation(&self, current_segment_size: u64) -> bool {
current_segment_size >= self.max_segment_size
}
/// Update the cached size of the current (latest) segment.
///
/// Call this after appending data to keep the cached size in sync with
/// the actual file size. This ensures that `read()` operations can use
/// the cached size for bounds checking without a disk stat call.
pub fn update_current_segment_size(&mut self, new_size: u64) {
if let Some(segment) = self.segments.last_mut() {
segment.size = new_size;
}
}
/// Create a new segment with the given base offset.
///
/// Writes a v2 FileHeader to the new file and adds it to the segment list.
#[instrument(skip(self), fields(base_offset))]
pub fn create_segment(&mut self, base_offset: u64) -> Result<&Segment> {
let filename = Segment::filename(base_offset);
let path = self.data_dir.join(&filename);
// Write header
let header = FileHeader::new();
let mut buf = Vec::with_capacity(HEADER_SIZE);
header.write_to(&mut buf)?;
fs::write(&path, &buf).map_err(|e| QuarantineError::io(&path, e))?;
let segment = Segment { base_offset, path, size: HEADER_SIZE as u64 };
self.segments.push(segment);
// Update metrics
self.update_metrics();
info!(base_offset, filename, "Created new segment");
self.segments.last().ok_or_else(|| {
QuarantineError::IoGeneric(std::io::Error::other("segment list unexpectedly empty"))
})
}
/// Delete segments whose entire range is below `min_cursor`.
///
/// A segment can be deleted if the *next* segment's base_offset <= min_cursor,
/// meaning no reads will ever need the deleted segment.
///
/// Returns the number of bytes freed.
#[instrument(skip(self))]
pub fn cleanup(&mut self, min_cursor: u64) -> Result<u64> {
let mut freed = 0u64;
let mut to_remove = Vec::new();
for (i, _segment) in self.segments.iter().enumerate() {
// Can only delete if there's a next segment and it starts at or below min_cursor
if i + 1 < self.segments.len() && self.segments[i + 1].base_offset <= min_cursor {
to_remove.push(i);
}
}
// Remove in reverse order to preserve indices
for &idx in to_remove.iter().rev() {
let segment = &self.segments[idx];
info!(
base_offset = segment.base_offset,
size = segment.size,
path = %segment.path.display(),
"Deleting old segment"
);
match fs::remove_file(&segment.path) {
Ok(()) => {
freed += segment.size;
self.segments.remove(idx);
}
Err(e) => {
warn!(
error = %e,
path = %segment.path.display(),
"Failed to delete segment file, keeping in list"
);
}
}
}
if freed > 0 {
info!(
freed_bytes = freed,
remaining_segments = self.segments.len(),
"Cleanup complete"
);
// Update metrics after cleanup
self.update_metrics();
}
Ok(freed)
}
/// Get the data directory path.
pub fn data_dir(&self) -> &Path {
&self.data_dir
}
/// Update metrics for disk usage and segment count.
fn update_metrics(&self) {
let total_disk_usage: u64 = self.segments.iter().map(|s| s.size).sum();
metrics::gauge!("stemedb_wal_disk_usage_bytes").set(total_disk_usage as f64);
metrics::gauge!("stemedb_wal_segments_count").set(self.segments.len() as f64);
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn test_segment_name_roundtrip() {
let offsets = [0u64, 1, 255, 65536, 0xDEAD_BEEF, u64::MAX];
for offset in offsets {
let name = Segment::filename(offset);
let parsed = Segment::parse_filename(&name);
assert_eq!(parsed, Some(offset), "Roundtrip failed for offset {}", offset);
}
}
#[test]
fn test_parse_filename_rejects_invalid() {
assert_eq!(Segment::parse_filename("not_a_wal.txt"), None);
assert_eq!(Segment::parse_filename("short.wal"), None);
assert_eq!(Segment::parse_filename("0000000000000000.log"), None);
assert_eq!(Segment::parse_filename(""), None);
// Too many hex digits
assert_eq!(Segment::parse_filename("00000000000000000.wal"), None);
}
#[test]
fn test_resolve_segment_binary_search() {
let dir = tempdir().expect("tempdir");
let mut mgr = SegmentManager::open(dir.path(), DEFAULT_MAX_SEGMENT_SIZE).expect("open");
// Create segments at offsets 0, 1000, 2000
mgr.create_segment(0).expect("seg 0");
mgr.create_segment(1000).expect("seg 1000");
mgr.create_segment(2000).expect("seg 2000");
// Offset 0 -> segment 0
assert_eq!(mgr.resolve_segment(0).map(|s| s.base_offset), Some(0));
// Offset 500 -> segment 0
assert_eq!(mgr.resolve_segment(500).map(|s| s.base_offset), Some(0));
// Offset 999 -> segment 0
assert_eq!(mgr.resolve_segment(999).map(|s| s.base_offset), Some(0));
// Offset 1000 -> segment 1000
assert_eq!(mgr.resolve_segment(1000).map(|s| s.base_offset), Some(1000));
// Offset 1500 -> segment 1000
assert_eq!(mgr.resolve_segment(1500).map(|s| s.base_offset), Some(1000));
// Offset 2000 -> segment 2000
assert_eq!(mgr.resolve_segment(2000).map(|s| s.base_offset), Some(2000));
// Offset 99999 -> segment 2000
assert_eq!(mgr.resolve_segment(99999).map(|s| s.base_offset), Some(2000));
}
#[test]
fn test_resolve_segment_empty() {
let dir = tempdir().expect("tempdir");
let mgr = SegmentManager::open(dir.path(), DEFAULT_MAX_SEGMENT_SIZE).expect("open");
assert!(mgr.resolve_segment(0).is_none());
}
#[test]
fn test_rotation_creates_new_segment() {
let dir = tempdir().expect("tempdir");
// Small threshold for testing: 1KB
let mut mgr = SegmentManager::open(dir.path(), 1024).expect("open");
mgr.create_segment(0).expect("create seg 0");
assert_eq!(mgr.segments().len(), 1);
// Simulate that segment 0 grew beyond threshold
assert!(mgr.needs_rotation(2048));
assert!(!mgr.needs_rotation(512));
mgr.create_segment(2048).expect("create seg 2048");
assert_eq!(mgr.segments().len(), 2);
}
#[test]
fn test_cleanup_deletes_old_segments() {
let dir = tempdir().expect("tempdir");
let mut mgr = SegmentManager::open(dir.path(), DEFAULT_MAX_SEGMENT_SIZE).expect("open");
mgr.create_segment(0).expect("seg 0");
mgr.create_segment(1000).expect("seg 1000");
mgr.create_segment(2000).expect("seg 2000");
assert_eq!(mgr.segments().len(), 3);
// Cleanup with min_cursor=1500: can delete seg 0 (next seg starts at 1000 <= 1500)
let freed = mgr.cleanup(1500).expect("cleanup");
assert!(freed > 0);
assert_eq!(mgr.segments().len(), 2);
assert_eq!(mgr.segments()[0].base_offset, 1000);
// Cleanup with min_cursor=2500: can delete seg 1000 (next starts at 2000 <= 2500)
let freed = mgr.cleanup(2500).expect("cleanup");
assert!(freed > 0);
assert_eq!(mgr.segments().len(), 1);
assert_eq!(mgr.segments()[0].base_offset, 2000);
// Last segment is never deleted
let freed = mgr.cleanup(u64::MAX).expect("cleanup");
assert_eq!(freed, 0);
assert_eq!(mgr.segments().len(), 1);
}
#[test]
fn test_segment_manager_scans_existing_files() {
let dir = tempdir().expect("tempdir");
// Create segments manually, then reopen
{
let mut mgr = SegmentManager::open(dir.path(), DEFAULT_MAX_SEGMENT_SIZE).expect("open");
mgr.create_segment(0).expect("seg 0");
mgr.create_segment(5000).expect("seg 5000");
mgr.create_segment(10000).expect("seg 10000");
}
// Reopen and verify scan
let mgr = SegmentManager::open(dir.path(), DEFAULT_MAX_SEGMENT_SIZE).expect("reopen");
assert_eq!(mgr.segments().len(), 3);
assert_eq!(mgr.segments()[0].base_offset, 0);
assert_eq!(mgr.segments()[1].base_offset, 5000);
assert_eq!(mgr.segments()[2].base_offset, 10000);
}
#[test]
fn test_segment_file_has_valid_header() {
let dir = tempdir().expect("tempdir");
let mut mgr = SegmentManager::open(dir.path(), DEFAULT_MAX_SEGMENT_SIZE).expect("open");
mgr.create_segment(0).expect("seg 0");
// Read the file and verify header
let data = std::fs::read(&mgr.segments()[0].path).expect("read");
assert_eq!(data.len(), HEADER_SIZE);
assert_eq!(&data[0..4], b"STEM");
assert_eq!(data[4], 2); // version
}
}