Week 1 — deployment prerequisites: - Add TIDAL_API_KEY Bearer auth middleware (constant-time comparison) - Handle SIGTERM alongside ctrl-c for graceful shutdown - Remove test-utils feature from production tidal-server binary - Fix standalone Dockerfile; add cluster Dockerfile and docker-compose - Extract MultiRegionState into state.rs with per-region TidalDb map Week 2 — operational middleware and observability: - Add body limit (2MB), request timeout (30s), concurrency limit (100) - Add SetRequestIdLayer + PropagateRequestIdLayer (x-request-id header) - Add TraceLayer with structured spans including request ID - Activate Prometheus /metrics endpoint via --metrics flag - Add monitoring.md, recovery.md, prometheus-alerts.yaml, grafana-dashboard.json Week 3 — query latency histograms and middleware integration tests: - Add QUERY_LATENCY_BOUNDS (100µs–10s) histogram to tidal library - Instrument retrieve() and search() with tidaldb_retrieve/search_latency_us - Fix: search() latency now recorded on error paths (was skipped via ?) - Lib+bin split in tidal-server enabling integration tests - Add 8 middleware integration tests (auth, body limit, request ID) - Add 2 Prometheus alert rules and 2 Grafana latency panels Post-review fixes: - Fix SIGTERM handler compilation on non-Unix targets (#[cfg(unix)] guard) - Exempt /health from TimeoutLayer + ConcurrencyLimitLayer (prevents false liveness failures under load) - Case-insensitive Bearer scheme matching per RFC 7235 §2.1
409 lines
16 KiB
Rust
409 lines
16 KiB
Rust
//! Runtime metrics for tidalDB.
|
|
//!
|
|
//! [`MetricsState`] is an `Arc`-shared bag of atomics that `TidalDb` updates
|
|
//! on every operation. The metrics HTTP server (when the `metrics` feature
|
|
//! is enabled) reads from this shared state to serve Prometheus text format.
|
|
//!
|
|
//! Adding a new counter in future milestones is:
|
|
//! 1. Add an `AtomicU64` field to `MetricsState`
|
|
//! 2. Increment it in the relevant `TidalDb` method
|
|
//! 3. Add one line to `MetricsState::render_prometheus`
|
|
|
|
pub(crate) mod histogram;
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) use histogram::{
|
|
LatencyHistogram, QUERY_LATENCY_BOUNDS, WRITE_LATENCY_BOUNDS, write_metric_line,
|
|
};
|
|
|
|
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
|
use std::time::Instant;
|
|
|
|
// ── MetricsState ────────────────────────────────────────────────────────────
|
|
|
|
/// Shared runtime metrics for a `TidalDb` instance.
|
|
///
|
|
/// Cheap to clone (`Arc` inside). Thread-safe.
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
/// let db = tidaldb::TidalDb::builder().ephemeral().open()?;
|
|
/// let metrics = db.metrics();
|
|
/// assert!(metrics.uptime_seconds() >= 0.0);
|
|
/// assert!((metrics.health_ok_value() - 1.0).abs() < f64::EPSILON);
|
|
/// # Ok(())
|
|
/// # }
|
|
/// ```
|
|
pub struct MetricsState {
|
|
/// Time the database was opened.
|
|
pub(crate) opened_at: Instant,
|
|
/// Whether the database is currently healthy.
|
|
pub(crate) health_ok: AtomicBool,
|
|
|
|
// ── Signal system + WAL metrics (M7p4 Task 02) ──────────────────────
|
|
/// Bytes of WAL segments not yet compacted.
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) wal_lag_bytes: AtomicU64,
|
|
|
|
/// Total WAL segments compacted since database open.
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) wal_compacted_segments_total: AtomicU64,
|
|
|
|
/// Unix timestamp (nanoseconds) of the last successful checkpoint.
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) last_checkpoint_ns: AtomicU64,
|
|
|
|
/// Number of entries in the signal ledger hot tier.
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) signal_hot_entries: AtomicU64,
|
|
|
|
/// Total signal writes since database open.
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) signal_writes_total: AtomicU64,
|
|
|
|
/// Signal write latency histogram (microseconds).
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) signal_write_latency: LatencyHistogram,
|
|
|
|
/// Retrieve query end-to-end latency histogram (microseconds).
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) retrieve_latency: LatencyHistogram,
|
|
|
|
/// Search query end-to-end latency histogram (microseconds).
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) search_latency: LatencyHistogram,
|
|
|
|
// ── Session + cohort + degradation metrics (m7p4, task-04) ─────────
|
|
/// Number of currently active sessions.
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) active_sessions: AtomicU64,
|
|
/// Total sessions closed since open (cumulative).
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) closed_sessions_total: AtomicU64,
|
|
/// Total sessions auto-closed due to timeout since open (cumulative).
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) session_auto_closed_total: AtomicU64,
|
|
/// Total requests rate-limited since open (cumulative).
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) rate_limited_total: AtomicU64,
|
|
/// Current degradation level (0=Full, 1=ReducedCandidates, 2=CoarseAggregates, 3=NoDiversity).
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) degradation_level: AtomicU64,
|
|
|
|
// ── Index health metrics (m7p4, task-03) ──────────────────────────────
|
|
/// Number of Tantivy segments for the items text index.
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) tantivy_segment_count: AtomicU64,
|
|
/// Number of documents indexed in the items text index.
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) tantivy_indexed_docs: AtomicU64,
|
|
/// Total byte size of the `USearch` index files (f16 estimate).
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) usearch_index_size_bytes: AtomicU64,
|
|
/// Number of vectors stored in the `USearch` index.
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) usearch_vector_count: AtomicU64,
|
|
/// Total cardinality across all bitmap index entries (category + format + creator + tag).
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) bitmap_index_cardinality: AtomicU64,
|
|
|
|
/// Total number of failed periodic signal checkpoints.
|
|
pub(crate) checkpoint_failures_total: AtomicU64,
|
|
|
|
// ── Replication metrics (m8p2) ──────────────────────────────────────
|
|
/// Current replication lag in WAL segments (follower only; 0 on leader).
|
|
#[cfg(feature = "metrics")]
|
|
pub(crate) replication_lag_seqno: AtomicU64,
|
|
|
|
// ── M8p5 control plane ───────────────────────────────────────────────
|
|
/// Shared control plane for cluster health queries.
|
|
pub(crate) control_plane: Option<std::sync::Arc<crate::replication::ControlPlane>>,
|
|
}
|
|
|
|
impl MetricsState {
|
|
pub(crate) fn new() -> Self {
|
|
Self {
|
|
opened_at: Instant::now(),
|
|
health_ok: AtomicBool::new(true),
|
|
#[cfg(feature = "metrics")]
|
|
wal_lag_bytes: AtomicU64::new(0),
|
|
#[cfg(feature = "metrics")]
|
|
wal_compacted_segments_total: AtomicU64::new(0),
|
|
#[cfg(feature = "metrics")]
|
|
last_checkpoint_ns: AtomicU64::new(0),
|
|
#[cfg(feature = "metrics")]
|
|
signal_hot_entries: AtomicU64::new(0),
|
|
#[cfg(feature = "metrics")]
|
|
signal_writes_total: AtomicU64::new(0),
|
|
#[cfg(feature = "metrics")]
|
|
signal_write_latency: LatencyHistogram::new(WRITE_LATENCY_BOUNDS),
|
|
#[cfg(feature = "metrics")]
|
|
retrieve_latency: LatencyHistogram::new(QUERY_LATENCY_BOUNDS),
|
|
#[cfg(feature = "metrics")]
|
|
search_latency: LatencyHistogram::new(QUERY_LATENCY_BOUNDS),
|
|
#[cfg(feature = "metrics")]
|
|
active_sessions: AtomicU64::new(0),
|
|
#[cfg(feature = "metrics")]
|
|
closed_sessions_total: AtomicU64::new(0),
|
|
#[cfg(feature = "metrics")]
|
|
session_auto_closed_total: AtomicU64::new(0),
|
|
#[cfg(feature = "metrics")]
|
|
rate_limited_total: AtomicU64::new(0),
|
|
#[cfg(feature = "metrics")]
|
|
degradation_level: AtomicU64::new(0),
|
|
#[cfg(feature = "metrics")]
|
|
tantivy_segment_count: AtomicU64::new(0),
|
|
#[cfg(feature = "metrics")]
|
|
tantivy_indexed_docs: AtomicU64::new(0),
|
|
#[cfg(feature = "metrics")]
|
|
usearch_index_size_bytes: AtomicU64::new(0),
|
|
#[cfg(feature = "metrics")]
|
|
usearch_vector_count: AtomicU64::new(0),
|
|
#[cfg(feature = "metrics")]
|
|
bitmap_index_cardinality: AtomicU64::new(0),
|
|
checkpoint_failures_total: AtomicU64::new(0),
|
|
#[cfg(feature = "metrics")]
|
|
replication_lag_seqno: AtomicU64::new(0),
|
|
control_plane: None,
|
|
}
|
|
}
|
|
|
|
/// Return the current cluster health snapshot, if a control plane is wired.
|
|
#[must_use]
|
|
pub fn cluster_health(&self) -> Option<crate::replication::ClusterHealth> {
|
|
self.control_plane.as_ref().map(|cp| cp.health())
|
|
}
|
|
|
|
/// Uptime in fractional seconds since the database was opened.
|
|
#[must_use]
|
|
pub fn uptime_seconds(&self) -> f64 {
|
|
self.opened_at.elapsed().as_secs_f64()
|
|
}
|
|
|
|
/// Whether the database reports healthy (1.0) or degraded (0.0).
|
|
#[must_use]
|
|
pub fn health_ok_value(&self) -> f64 {
|
|
if self.health_ok.load(Ordering::Relaxed) {
|
|
1.0
|
|
} else {
|
|
0.0
|
|
}
|
|
}
|
|
|
|
/// Render Prometheus text exposition format for all metrics.
|
|
///
|
|
/// Format: <https://prometheus.io/docs/instrumenting/exposition_formats/>
|
|
#[must_use]
|
|
#[allow(clippy::too_many_lines)]
|
|
pub fn render_prometheus(&self) -> String {
|
|
let uptime = self.uptime_seconds();
|
|
let health = self.health_ok_value();
|
|
let version = env!("CARGO_PKG_VERSION");
|
|
let build_hash = crate::BUILD_HASH;
|
|
|
|
#[allow(unused_mut)]
|
|
let mut out = format!(
|
|
"# HELP tidaldb_uptime_seconds Seconds since database opened.\n\
|
|
# TYPE tidaldb_uptime_seconds gauge\n\
|
|
tidaldb_uptime_seconds{{partition_id=\"0\"}} {uptime:.3}\n\n\
|
|
# HELP tidaldb_health_ok Whether the database is healthy. 1 = ok, 0 = degraded.\n\
|
|
# TYPE tidaldb_health_ok gauge\n\
|
|
tidaldb_health_ok{{partition_id=\"0\"}} {health}\n\n\
|
|
# HELP tidaldb_info Build and version information.\n\
|
|
# TYPE tidaldb_info gauge\n\
|
|
tidaldb_info{{version=\"{version}\",build_hash=\"{build_hash}\",partition_id=\"0\"}} 1\n"
|
|
);
|
|
|
|
// Prometheus uses f64 natively; precision loss on u64->f64 is
|
|
// intentional and acceptable for monitoring counters/gauges.
|
|
#[cfg(feature = "metrics")]
|
|
#[allow(clippy::cast_precision_loss)]
|
|
{
|
|
let now_ns = std::time::SystemTime::now()
|
|
.duration_since(std::time::UNIX_EPOCH)
|
|
.unwrap_or_default()
|
|
.as_nanos() as u64;
|
|
|
|
write_metric_line(
|
|
&mut out,
|
|
"tidaldb_wal_lag_bytes",
|
|
"Bytes of WAL segments not yet compacted",
|
|
"gauge",
|
|
self.wal_lag_bytes.load(Ordering::Relaxed) as f64,
|
|
);
|
|
|
|
write_metric_line(
|
|
&mut out,
|
|
"tidaldb_wal_compacted_segments_total",
|
|
"Total WAL segments compacted since open",
|
|
"counter",
|
|
self.wal_compacted_segments_total.load(Ordering::Relaxed) as f64,
|
|
);
|
|
|
|
let last_cp_ns = self.last_checkpoint_ns.load(Ordering::Relaxed);
|
|
let checkpoint_age = if last_cp_ns > 0 && now_ns > last_cp_ns {
|
|
now_ns - last_cp_ns
|
|
} else {
|
|
0
|
|
};
|
|
write_metric_line(
|
|
&mut out,
|
|
"tidaldb_checkpoint_age_seconds",
|
|
"Seconds since the last successful checkpoint",
|
|
"gauge",
|
|
checkpoint_age as f64 / 1_000_000_000.0,
|
|
);
|
|
|
|
write_metric_line(
|
|
&mut out,
|
|
"tidaldb_signal_hot_entries",
|
|
"Number of entries in the signal ledger hot tier",
|
|
"gauge",
|
|
self.signal_hot_entries.load(Ordering::Relaxed) as f64,
|
|
);
|
|
|
|
write_metric_line(
|
|
&mut out,
|
|
"tidaldb_signal_writes_total",
|
|
"Total signal writes since database open",
|
|
"counter",
|
|
self.signal_writes_total.load(Ordering::Relaxed) as f64,
|
|
);
|
|
|
|
out.push_str(&self.signal_write_latency.render_prometheus(
|
|
"tidaldb_signal_write_latency_us",
|
|
"Signal write latency in microseconds",
|
|
));
|
|
|
|
out.push_str(&self.retrieve_latency.render_prometheus(
|
|
"tidaldb_retrieve_latency_us",
|
|
"Retrieve query end-to-end latency in microseconds",
|
|
));
|
|
|
|
out.push_str(&self.search_latency.render_prometheus(
|
|
"tidaldb_search_latency_us",
|
|
"Search query end-to-end latency in microseconds",
|
|
));
|
|
|
|
// Index health metrics.
|
|
write_metric_line(
|
|
&mut out,
|
|
"tidaldb_tantivy_segment_count",
|
|
"Number of Tantivy index segments",
|
|
"gauge",
|
|
self.tantivy_segment_count.load(Ordering::Relaxed) as f64,
|
|
);
|
|
write_metric_line(
|
|
&mut out,
|
|
"tidaldb_tantivy_indexed_docs",
|
|
"Number of documents indexed in Tantivy",
|
|
"gauge",
|
|
self.tantivy_indexed_docs.load(Ordering::Relaxed) as f64,
|
|
);
|
|
write_metric_line(
|
|
&mut out,
|
|
"tidaldb_usearch_index_size_bytes",
|
|
"Estimated byte size of USearch vector indexes",
|
|
"gauge",
|
|
self.usearch_index_size_bytes.load(Ordering::Relaxed) as f64,
|
|
);
|
|
write_metric_line(
|
|
&mut out,
|
|
"tidaldb_usearch_vector_count",
|
|
"Number of vectors stored in USearch indexes",
|
|
"gauge",
|
|
self.usearch_vector_count.load(Ordering::Relaxed) as f64,
|
|
);
|
|
write_metric_line(
|
|
&mut out,
|
|
"tidaldb_bitmap_index_cardinality",
|
|
"Total entity IDs across all bitmap indexes",
|
|
"gauge",
|
|
self.bitmap_index_cardinality.load(Ordering::Relaxed) as f64,
|
|
);
|
|
|
|
// Session lifecycle metrics.
|
|
write_metric_line(
|
|
&mut out,
|
|
"tidaldb_active_sessions",
|
|
"Number of currently active agent sessions",
|
|
"gauge",
|
|
self.active_sessions.load(Ordering::Relaxed) as f64,
|
|
);
|
|
write_metric_line(
|
|
&mut out,
|
|
"tidaldb_closed_sessions_total",
|
|
"Total agent sessions closed since open",
|
|
"counter",
|
|
self.closed_sessions_total.load(Ordering::Relaxed) as f64,
|
|
);
|
|
write_metric_line(
|
|
&mut out,
|
|
"tidaldb_session_auto_closed_total",
|
|
"Total agent sessions auto-closed due to timeout",
|
|
"counter",
|
|
self.session_auto_closed_total.load(Ordering::Relaxed) as f64,
|
|
);
|
|
// Rate limiting.
|
|
write_metric_line(
|
|
&mut out,
|
|
"tidaldb_rate_limited_total",
|
|
"Total requests rate-limited due to overload",
|
|
"counter",
|
|
self.rate_limited_total.load(Ordering::Relaxed) as f64,
|
|
);
|
|
// Degradation.
|
|
write_metric_line(
|
|
&mut out,
|
|
"tidaldb_degradation_level",
|
|
"Current degradation level (0=full, 1=reduced, 2=coarse, 3=no_diversity)",
|
|
"gauge",
|
|
self.degradation_level.load(Ordering::Relaxed) as f64,
|
|
);
|
|
|
|
// Replication lag.
|
|
write_metric_line(
|
|
&mut out,
|
|
"tidaldb_replication_lag_seqno",
|
|
"Replication lag in WAL segments behind the leader",
|
|
"gauge",
|
|
self.replication_lag_seqno.load(Ordering::Relaxed) as f64,
|
|
);
|
|
}
|
|
|
|
// Checkpoint failure counter (unconditional -- not feature-gated).
|
|
{
|
|
use std::fmt::Write;
|
|
let failures = self.checkpoint_failures_total.load(Ordering::Relaxed);
|
|
let _ = write!(
|
|
out,
|
|
"\n# HELP tidaldb_checkpoint_failures_total Total number of failed periodic signal checkpoints\n\
|
|
# TYPE tidaldb_checkpoint_failures_total counter\n\
|
|
tidaldb_checkpoint_failures_total {failures}\n"
|
|
);
|
|
}
|
|
|
|
out
|
|
}
|
|
|
|
/// Render JSON for /healthz.
|
|
#[must_use]
|
|
pub fn render_healthz(&self) -> String {
|
|
let uptime = self.uptime_seconds();
|
|
let status = if self.health_ok.load(Ordering::Relaxed) {
|
|
"ok"
|
|
} else {
|
|
"degraded"
|
|
};
|
|
let version = env!("CARGO_PKG_VERSION");
|
|
let build_hash = crate::BUILD_HASH;
|
|
format!(
|
|
r#"{{"status":"{status}","uptime_seconds":{uptime:.3},"version":"{version}","build_hash":"{build_hash}"}}"#
|
|
)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests;
|