tidaldb/tidal/src/db/metrics/mod.rs
jordan a0a33f4d9a feat: harden tidal-server for production (Weeks 1–3)
Week 1 — deployment prerequisites:
- Add TIDAL_API_KEY Bearer auth middleware (constant-time comparison)
- Handle SIGTERM alongside ctrl-c for graceful shutdown
- Remove test-utils feature from production tidal-server binary
- Fix standalone Dockerfile; add cluster Dockerfile and docker-compose
- Extract MultiRegionState into state.rs with per-region TidalDb map

Week 2 — operational middleware and observability:
- Add body limit (2MB), request timeout (30s), concurrency limit (100)
- Add SetRequestIdLayer + PropagateRequestIdLayer (x-request-id header)
- Add TraceLayer with structured spans including request ID
- Activate Prometheus /metrics endpoint via --metrics flag
- Add monitoring.md, recovery.md, prometheus-alerts.yaml, grafana-dashboard.json

Week 3 — query latency histograms and middleware integration tests:
- Add QUERY_LATENCY_BOUNDS (100µs–10s) histogram to tidal library
- Instrument retrieve() and search() with tidaldb_retrieve/search_latency_us
- Fix: search() latency now recorded on error paths (was skipped via ?)
- Lib+bin split in tidal-server enabling integration tests
- Add 8 middleware integration tests (auth, body limit, request ID)
- Add 2 Prometheus alert rules and 2 Grafana latency panels

Post-review fixes:
- Fix SIGTERM handler compilation on non-Unix targets (#[cfg(unix)] guard)
- Exempt /health from TimeoutLayer + ConcurrencyLimitLayer (prevents false liveness failures under load)
- Case-insensitive Bearer scheme matching per RFC 7235 §2.1
2026-02-27 20:32:39 -07:00

409 lines
16 KiB
Rust

//! Runtime metrics for tidalDB.
//!
//! [`MetricsState`] is an `Arc`-shared bag of atomics that `TidalDb` updates
//! on every operation. The metrics HTTP server (when the `metrics` feature
//! is enabled) reads from this shared state to serve Prometheus text format.
//!
//! Adding a new counter in future milestones is:
//! 1. Add an `AtomicU64` field to `MetricsState`
//! 2. Increment it in the relevant `TidalDb` method
//! 3. Add one line to `MetricsState::render_prometheus`
pub(crate) mod histogram;
#[cfg(feature = "metrics")]
pub(crate) use histogram::{
LatencyHistogram, QUERY_LATENCY_BOUNDS, WRITE_LATENCY_BOUNDS, write_metric_line,
};
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::time::Instant;
// ── MetricsState ────────────────────────────────────────────────────────────
/// Shared runtime metrics for a `TidalDb` instance.
///
/// Cheap to clone (`Arc` inside). Thread-safe.
///
/// # Examples
///
/// ```
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let db = tidaldb::TidalDb::builder().ephemeral().open()?;
/// let metrics = db.metrics();
/// assert!(metrics.uptime_seconds() >= 0.0);
/// assert!((metrics.health_ok_value() - 1.0).abs() < f64::EPSILON);
/// # Ok(())
/// # }
/// ```
pub struct MetricsState {
/// Time the database was opened.
pub(crate) opened_at: Instant,
/// Whether the database is currently healthy.
pub(crate) health_ok: AtomicBool,
// ── Signal system + WAL metrics (M7p4 Task 02) ──────────────────────
/// Bytes of WAL segments not yet compacted.
#[cfg(feature = "metrics")]
pub(crate) wal_lag_bytes: AtomicU64,
/// Total WAL segments compacted since database open.
#[cfg(feature = "metrics")]
pub(crate) wal_compacted_segments_total: AtomicU64,
/// Unix timestamp (nanoseconds) of the last successful checkpoint.
#[cfg(feature = "metrics")]
pub(crate) last_checkpoint_ns: AtomicU64,
/// Number of entries in the signal ledger hot tier.
#[cfg(feature = "metrics")]
pub(crate) signal_hot_entries: AtomicU64,
/// Total signal writes since database open.
#[cfg(feature = "metrics")]
pub(crate) signal_writes_total: AtomicU64,
/// Signal write latency histogram (microseconds).
#[cfg(feature = "metrics")]
pub(crate) signal_write_latency: LatencyHistogram,
/// Retrieve query end-to-end latency histogram (microseconds).
#[cfg(feature = "metrics")]
pub(crate) retrieve_latency: LatencyHistogram,
/// Search query end-to-end latency histogram (microseconds).
#[cfg(feature = "metrics")]
pub(crate) search_latency: LatencyHistogram,
// ── Session + cohort + degradation metrics (m7p4, task-04) ─────────
/// Number of currently active sessions.
#[cfg(feature = "metrics")]
pub(crate) active_sessions: AtomicU64,
/// Total sessions closed since open (cumulative).
#[cfg(feature = "metrics")]
pub(crate) closed_sessions_total: AtomicU64,
/// Total sessions auto-closed due to timeout since open (cumulative).
#[cfg(feature = "metrics")]
pub(crate) session_auto_closed_total: AtomicU64,
/// Total requests rate-limited since open (cumulative).
#[cfg(feature = "metrics")]
pub(crate) rate_limited_total: AtomicU64,
/// Current degradation level (0=Full, 1=ReducedCandidates, 2=CoarseAggregates, 3=NoDiversity).
#[cfg(feature = "metrics")]
pub(crate) degradation_level: AtomicU64,
// ── Index health metrics (m7p4, task-03) ──────────────────────────────
/// Number of Tantivy segments for the items text index.
#[cfg(feature = "metrics")]
pub(crate) tantivy_segment_count: AtomicU64,
/// Number of documents indexed in the items text index.
#[cfg(feature = "metrics")]
pub(crate) tantivy_indexed_docs: AtomicU64,
/// Total byte size of the `USearch` index files (f16 estimate).
#[cfg(feature = "metrics")]
pub(crate) usearch_index_size_bytes: AtomicU64,
/// Number of vectors stored in the `USearch` index.
#[cfg(feature = "metrics")]
pub(crate) usearch_vector_count: AtomicU64,
/// Total cardinality across all bitmap index entries (category + format + creator + tag).
#[cfg(feature = "metrics")]
pub(crate) bitmap_index_cardinality: AtomicU64,
/// Total number of failed periodic signal checkpoints.
pub(crate) checkpoint_failures_total: AtomicU64,
// ── Replication metrics (m8p2) ──────────────────────────────────────
/// Current replication lag in WAL segments (follower only; 0 on leader).
#[cfg(feature = "metrics")]
pub(crate) replication_lag_seqno: AtomicU64,
// ── M8p5 control plane ───────────────────────────────────────────────
/// Shared control plane for cluster health queries.
pub(crate) control_plane: Option<std::sync::Arc<crate::replication::ControlPlane>>,
}
impl MetricsState {
pub(crate) fn new() -> Self {
Self {
opened_at: Instant::now(),
health_ok: AtomicBool::new(true),
#[cfg(feature = "metrics")]
wal_lag_bytes: AtomicU64::new(0),
#[cfg(feature = "metrics")]
wal_compacted_segments_total: AtomicU64::new(0),
#[cfg(feature = "metrics")]
last_checkpoint_ns: AtomicU64::new(0),
#[cfg(feature = "metrics")]
signal_hot_entries: AtomicU64::new(0),
#[cfg(feature = "metrics")]
signal_writes_total: AtomicU64::new(0),
#[cfg(feature = "metrics")]
signal_write_latency: LatencyHistogram::new(WRITE_LATENCY_BOUNDS),
#[cfg(feature = "metrics")]
retrieve_latency: LatencyHistogram::new(QUERY_LATENCY_BOUNDS),
#[cfg(feature = "metrics")]
search_latency: LatencyHistogram::new(QUERY_LATENCY_BOUNDS),
#[cfg(feature = "metrics")]
active_sessions: AtomicU64::new(0),
#[cfg(feature = "metrics")]
closed_sessions_total: AtomicU64::new(0),
#[cfg(feature = "metrics")]
session_auto_closed_total: AtomicU64::new(0),
#[cfg(feature = "metrics")]
rate_limited_total: AtomicU64::new(0),
#[cfg(feature = "metrics")]
degradation_level: AtomicU64::new(0),
#[cfg(feature = "metrics")]
tantivy_segment_count: AtomicU64::new(0),
#[cfg(feature = "metrics")]
tantivy_indexed_docs: AtomicU64::new(0),
#[cfg(feature = "metrics")]
usearch_index_size_bytes: AtomicU64::new(0),
#[cfg(feature = "metrics")]
usearch_vector_count: AtomicU64::new(0),
#[cfg(feature = "metrics")]
bitmap_index_cardinality: AtomicU64::new(0),
checkpoint_failures_total: AtomicU64::new(0),
#[cfg(feature = "metrics")]
replication_lag_seqno: AtomicU64::new(0),
control_plane: None,
}
}
/// Return the current cluster health snapshot, if a control plane is wired.
#[must_use]
pub fn cluster_health(&self) -> Option<crate::replication::ClusterHealth> {
self.control_plane.as_ref().map(|cp| cp.health())
}
/// Uptime in fractional seconds since the database was opened.
#[must_use]
pub fn uptime_seconds(&self) -> f64 {
self.opened_at.elapsed().as_secs_f64()
}
/// Whether the database reports healthy (1.0) or degraded (0.0).
#[must_use]
pub fn health_ok_value(&self) -> f64 {
if self.health_ok.load(Ordering::Relaxed) {
1.0
} else {
0.0
}
}
/// Render Prometheus text exposition format for all metrics.
///
/// Format: <https://prometheus.io/docs/instrumenting/exposition_formats/>
#[must_use]
#[allow(clippy::too_many_lines)]
pub fn render_prometheus(&self) -> String {
let uptime = self.uptime_seconds();
let health = self.health_ok_value();
let version = env!("CARGO_PKG_VERSION");
let build_hash = crate::BUILD_HASH;
#[allow(unused_mut)]
let mut out = format!(
"# HELP tidaldb_uptime_seconds Seconds since database opened.\n\
# TYPE tidaldb_uptime_seconds gauge\n\
tidaldb_uptime_seconds{{partition_id=\"0\"}} {uptime:.3}\n\n\
# HELP tidaldb_health_ok Whether the database is healthy. 1 = ok, 0 = degraded.\n\
# TYPE tidaldb_health_ok gauge\n\
tidaldb_health_ok{{partition_id=\"0\"}} {health}\n\n\
# HELP tidaldb_info Build and version information.\n\
# TYPE tidaldb_info gauge\n\
tidaldb_info{{version=\"{version}\",build_hash=\"{build_hash}\",partition_id=\"0\"}} 1\n"
);
// Prometheus uses f64 natively; precision loss on u64->f64 is
// intentional and acceptable for monitoring counters/gauges.
#[cfg(feature = "metrics")]
#[allow(clippy::cast_precision_loss)]
{
let now_ns = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_nanos() as u64;
write_metric_line(
&mut out,
"tidaldb_wal_lag_bytes",
"Bytes of WAL segments not yet compacted",
"gauge",
self.wal_lag_bytes.load(Ordering::Relaxed) as f64,
);
write_metric_line(
&mut out,
"tidaldb_wal_compacted_segments_total",
"Total WAL segments compacted since open",
"counter",
self.wal_compacted_segments_total.load(Ordering::Relaxed) as f64,
);
let last_cp_ns = self.last_checkpoint_ns.load(Ordering::Relaxed);
let checkpoint_age = if last_cp_ns > 0 && now_ns > last_cp_ns {
now_ns - last_cp_ns
} else {
0
};
write_metric_line(
&mut out,
"tidaldb_checkpoint_age_seconds",
"Seconds since the last successful checkpoint",
"gauge",
checkpoint_age as f64 / 1_000_000_000.0,
);
write_metric_line(
&mut out,
"tidaldb_signal_hot_entries",
"Number of entries in the signal ledger hot tier",
"gauge",
self.signal_hot_entries.load(Ordering::Relaxed) as f64,
);
write_metric_line(
&mut out,
"tidaldb_signal_writes_total",
"Total signal writes since database open",
"counter",
self.signal_writes_total.load(Ordering::Relaxed) as f64,
);
out.push_str(&self.signal_write_latency.render_prometheus(
"tidaldb_signal_write_latency_us",
"Signal write latency in microseconds",
));
out.push_str(&self.retrieve_latency.render_prometheus(
"tidaldb_retrieve_latency_us",
"Retrieve query end-to-end latency in microseconds",
));
out.push_str(&self.search_latency.render_prometheus(
"tidaldb_search_latency_us",
"Search query end-to-end latency in microseconds",
));
// Index health metrics.
write_metric_line(
&mut out,
"tidaldb_tantivy_segment_count",
"Number of Tantivy index segments",
"gauge",
self.tantivy_segment_count.load(Ordering::Relaxed) as f64,
);
write_metric_line(
&mut out,
"tidaldb_tantivy_indexed_docs",
"Number of documents indexed in Tantivy",
"gauge",
self.tantivy_indexed_docs.load(Ordering::Relaxed) as f64,
);
write_metric_line(
&mut out,
"tidaldb_usearch_index_size_bytes",
"Estimated byte size of USearch vector indexes",
"gauge",
self.usearch_index_size_bytes.load(Ordering::Relaxed) as f64,
);
write_metric_line(
&mut out,
"tidaldb_usearch_vector_count",
"Number of vectors stored in USearch indexes",
"gauge",
self.usearch_vector_count.load(Ordering::Relaxed) as f64,
);
write_metric_line(
&mut out,
"tidaldb_bitmap_index_cardinality",
"Total entity IDs across all bitmap indexes",
"gauge",
self.bitmap_index_cardinality.load(Ordering::Relaxed) as f64,
);
// Session lifecycle metrics.
write_metric_line(
&mut out,
"tidaldb_active_sessions",
"Number of currently active agent sessions",
"gauge",
self.active_sessions.load(Ordering::Relaxed) as f64,
);
write_metric_line(
&mut out,
"tidaldb_closed_sessions_total",
"Total agent sessions closed since open",
"counter",
self.closed_sessions_total.load(Ordering::Relaxed) as f64,
);
write_metric_line(
&mut out,
"tidaldb_session_auto_closed_total",
"Total agent sessions auto-closed due to timeout",
"counter",
self.session_auto_closed_total.load(Ordering::Relaxed) as f64,
);
// Rate limiting.
write_metric_line(
&mut out,
"tidaldb_rate_limited_total",
"Total requests rate-limited due to overload",
"counter",
self.rate_limited_total.load(Ordering::Relaxed) as f64,
);
// Degradation.
write_metric_line(
&mut out,
"tidaldb_degradation_level",
"Current degradation level (0=full, 1=reduced, 2=coarse, 3=no_diversity)",
"gauge",
self.degradation_level.load(Ordering::Relaxed) as f64,
);
// Replication lag.
write_metric_line(
&mut out,
"tidaldb_replication_lag_seqno",
"Replication lag in WAL segments behind the leader",
"gauge",
self.replication_lag_seqno.load(Ordering::Relaxed) as f64,
);
}
// Checkpoint failure counter (unconditional -- not feature-gated).
{
use std::fmt::Write;
let failures = self.checkpoint_failures_total.load(Ordering::Relaxed);
let _ = write!(
out,
"\n# HELP tidaldb_checkpoint_failures_total Total number of failed periodic signal checkpoints\n\
# TYPE tidaldb_checkpoint_failures_total counter\n\
tidaldb_checkpoint_failures_total {failures}\n"
);
}
out
}
/// Render JSON for /healthz.
#[must_use]
pub fn render_healthz(&self) -> String {
let uptime = self.uptime_seconds();
let status = if self.health_ok.load(Ordering::Relaxed) {
"ok"
} else {
"degraded"
};
let version = env!("CARGO_PKG_VERSION");
let build_hash = crate::BUILD_HASH;
format!(
r#"{{"status":"{status}","uptime_seconds":{uptime:.3},"version":"{version}","build_hash":"{build_hash}"}}"#
)
}
}
#[cfg(test)]
mod tests;