391 lines
16 KiB
Rust
391 lines
16 KiB
Rust
//! Entity state rebuild from durable storage and periodic checkpoint thread.
|
|
|
|
use std::path::PathBuf;
|
|
use std::sync::Arc;
|
|
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
|
use std::time::Duration;
|
|
|
|
use crate::cohort::CohortSignalLedger;
|
|
use crate::query::suggest::SuggestionIndex;
|
|
use crate::schema::{TidalError, Timestamp};
|
|
use crate::signals::{DEFAULT_MAX_SIGNAL_ENTRIES, SignalLedger, trim_cold_entries};
|
|
use crate::storage::{StorageEngine, Tag};
|
|
|
|
use super::metadata::deserialize_metadata;
|
|
use super::metrics::MetricsState;
|
|
use super::storage_box::StorageBox;
|
|
|
|
// ── Index health metrics handles ────────────────────────────────────────────
|
|
|
|
/// Handles to live index instances for periodic metrics refresh.
|
|
///
|
|
/// When the `metrics` feature is enabled, carries `Arc`/clone references to
|
|
/// the text index, embedding registry, and bitmap indexes so the checkpoint
|
|
/// thread can read their current sizes without touching `TidalDb` itself.
|
|
///
|
|
/// When the `metrics` feature is disabled, this is a zero-size type so the
|
|
/// function signature remains unchanged and the compiler eliminates all
|
|
/// overhead.
|
|
#[cfg(feature = "metrics")]
|
|
pub(super) struct IndexMetricsHandles {
|
|
pub text_index: Option<Arc<crate::text::TextIndex>>,
|
|
pub embedding_registry:
|
|
Arc<std::sync::RwLock<crate::storage::vector::registry::EmbeddingSlotRegistry>>,
|
|
pub bitmap_category: crate::storage::indexes::bitmap::BitmapIndex,
|
|
pub bitmap_format: crate::storage::indexes::bitmap::BitmapIndex,
|
|
pub bitmap_creator: crate::storage::indexes::bitmap::BitmapIndex,
|
|
pub bitmap_tag: crate::storage::indexes::bitmap::BitmapIndex,
|
|
}
|
|
|
|
#[cfg(not(feature = "metrics"))]
|
|
pub(super) struct IndexMetricsHandles;
|
|
|
|
/// Rebuild in-memory entity state from durable storage on restart.
|
|
///
|
|
/// Scans the users keyspace for relationship edges and the items keyspace for
|
|
/// `creator_id` metadata. Populates:
|
|
/// 1. `user_state.blocked` from `RelationshipType::Blocks` edges
|
|
/// 2. `user_state.seen` (hidden items) from `RelationshipType::Hide` edges
|
|
/// 3. `user_state.follows` from `RelationshipType::Follows` edges
|
|
/// 4. `creator_items` bitmap from items with `creator_id` metadata
|
|
/// 5. `interaction_ledger` from `RelationshipType::InteractionWeight` edges
|
|
///
|
|
/// For ephemeral mode, all engines are empty, so this is effectively a no-op.
|
|
pub(super) fn rebuild_entity_state(
|
|
storage: &StorageBox,
|
|
user_state: &crate::entities::UserStateIndex,
|
|
creator_items: &crate::entities::CreatorItemsBitmap,
|
|
interaction_ledger: &crate::entities::InteractionLedger,
|
|
) -> crate::Result<()> {
|
|
use crate::entities::relationship::{
|
|
RelationshipType, deserialize_relationship_value, parse_relationship_to,
|
|
};
|
|
use crate::storage::keys::parse_key;
|
|
|
|
// Scan the users keyspace for all relationship edges.
|
|
// The relationship key format is:
|
|
// [from_entity_id: 8 BE][0x00][Tag::Rel (0x04)][rel_type: 1][to_entity_id: 8 BE]
|
|
// We scan with an empty prefix to get all keys, then filter for Tag::Rel.
|
|
let mut rel_count = 0u64;
|
|
for entry in storage.users_engine().scan_prefix(&[]) {
|
|
let (key, value) = entry.map_err(TidalError::from)?;
|
|
|
|
// Only process relationship keys (Tag::Rel = 0x04).
|
|
if let Some((from_id, Tag::Rel, suffix)) = parse_key(&key) {
|
|
// suffix = [rel_type: 1 byte][to_entity_id: 8 BE]
|
|
if suffix.is_empty() {
|
|
continue;
|
|
}
|
|
let rel_type_byte = suffix[0];
|
|
let Some(rel_type) = RelationshipType::from_byte(rel_type_byte) else {
|
|
continue;
|
|
};
|
|
let Some(to_id) = parse_relationship_to(&key) else {
|
|
continue;
|
|
};
|
|
let from_id_u64 = from_id.as_u64();
|
|
|
|
match rel_type {
|
|
RelationshipType::Blocks => {
|
|
user_state.add_block_creator(from_id_u64, to_id.as_u64());
|
|
rel_count += 1;
|
|
}
|
|
RelationshipType::Hide => {
|
|
#[allow(clippy::cast_possible_truncation)]
|
|
user_state.add_hide(from_id_u64, to_id.as_u64() as u32);
|
|
rel_count += 1;
|
|
}
|
|
RelationshipType::Follows => {
|
|
// Forward: user -> followed creator
|
|
user_state.add_follow(from_id_u64, to_id.as_u64());
|
|
// Reverse: creator -> follower users
|
|
user_state.add_creator_follower(to_id.as_u64(), from_id_u64);
|
|
rel_count += 1;
|
|
}
|
|
RelationshipType::InteractionWeight => {
|
|
// Reconstruct interaction weight from the stored edge value.
|
|
if let Some((weight, ts_nanos)) = deserialize_relationship_value(&value) {
|
|
interaction_ledger.record(from_id_u64, to_id.as_u64(), weight, ts_nanos);
|
|
rel_count += 1;
|
|
}
|
|
}
|
|
RelationshipType::Mute => {
|
|
// Mute edges do not have in-memory state (yet).
|
|
rel_count += 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Scan items keyspace for creator_id metadata to rebuild creator_items bitmap.
|
|
let mut item_count = 0u64;
|
|
let item_scan_start = std::time::Instant::now();
|
|
for entry in storage.items_engine().scan_prefix(&[]) {
|
|
let (key, value) = entry.map_err(TidalError::from)?;
|
|
|
|
if let Some((entity_id, Tag::Meta, _suffix)) = parse_key(&key) {
|
|
let meta = deserialize_metadata(&value);
|
|
if let Some(creator_str) = meta.get("creator_id")
|
|
&& let Ok(creator_id) = creator_str.parse::<u64>()
|
|
{
|
|
#[allow(clippy::cast_possible_truncation)]
|
|
creator_items.add_item(creator_id, entity_id.as_u64() as u32);
|
|
item_count += 1;
|
|
if item_count.is_multiple_of(10_000) {
|
|
tracing::info!(rebuilt = item_count, "entity state rebuild in progress");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if item_count > 0 {
|
|
tracing::info!(
|
|
rebuilt = item_count,
|
|
elapsed_ms = item_scan_start.elapsed().as_millis(),
|
|
"entity state item scan complete"
|
|
);
|
|
}
|
|
|
|
if rel_count > 0 || item_count > 0 {
|
|
tracing::info!(
|
|
relationships = rel_count,
|
|
creator_items = item_count,
|
|
"entity state rebuilt from durable storage"
|
|
);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Rebuild `SuggestionIndex` title terms from durable item metadata on restart.
|
|
///
|
|
/// Scans the items keyspace for `Tag::Meta` keys, deserializes metadata, and
|
|
/// calls `suggestion_index.index_title(title)` for each item that has a `"title"`
|
|
/// field. This ensures autocomplete works correctly after a restart without
|
|
/// requiring all items to be re-written.
|
|
///
|
|
/// For ephemeral mode the engine is empty, so this is a no-op.
|
|
pub(super) fn rebuild_suggestion_index(storage: &StorageBox, suggestion_index: &SuggestionIndex) {
|
|
let mut indexed = 0u64;
|
|
for entry in storage.items_engine().scan_prefix(&[]) {
|
|
let Ok((key, value)) = entry else { continue };
|
|
if let Some((_entity_id, Tag::Meta, _suffix)) = crate::storage::keys::parse_key(&key) {
|
|
let meta = deserialize_metadata(&value);
|
|
if let Some(title) = meta.get("title") {
|
|
suggestion_index.index_title(title);
|
|
indexed += 1;
|
|
}
|
|
}
|
|
}
|
|
if indexed > 0 {
|
|
tracing::info!(
|
|
items = indexed,
|
|
"suggestion index rebuilt from durable storage"
|
|
);
|
|
}
|
|
}
|
|
|
|
/// Background thread body: checkpoint signal state to storage every 30 seconds.
|
|
///
|
|
/// Checkpoints both the global signal ledger and the cohort signal ledger
|
|
/// atomically (each writes its own `WriteBatch`). The cohort checkpoint uses
|
|
/// the same storage engine and the same `CheckpointMeta` as the global ledger.
|
|
///
|
|
/// After each successful checkpoint, compacts WAL segments that are fully
|
|
/// covered by the checkpoint. Compaction failure is non-fatal: a warning is
|
|
/// logged and the next checkpoint cycle will retry.
|
|
///
|
|
/// Polls the shutdown flag every 500ms so the thread exits promptly when
|
|
/// `shutdown_inner()` is called. Only runs in persistent mode (ephemeral opens
|
|
/// never spawn this thread).
|
|
///
|
|
/// The `Arc` arguments are intentionally passed by value: the thread must own
|
|
/// them for its entire lifetime (references cannot satisfy the `'static` bound
|
|
/// required by `std::thread::spawn`).
|
|
#[allow(clippy::needless_pass_by_value, clippy::too_many_arguments)]
|
|
pub(super) fn run_checkpoint_thread(
|
|
shutdown: Arc<AtomicBool>,
|
|
ledger: Arc<SignalLedger>,
|
|
cohort_ledger: Arc<CohortSignalLedger>,
|
|
storage: Box<dyn StorageEngine + Send + Sync>,
|
|
last_wal_seq: Arc<AtomicU64>,
|
|
wal_dir: Option<PathBuf>,
|
|
metrics: Arc<MetricsState>,
|
|
index_handles: IndexMetricsHandles,
|
|
) {
|
|
const CHECKPOINT_INTERVAL: Duration = Duration::from_secs(30);
|
|
/// Index health metrics (Tantivy, `USearch`, bitmap) refresh every 10s -- 3x more
|
|
/// frequent than checkpoints so operators get near-real-time index visibility.
|
|
const INDEX_METRICS_INTERVAL: Duration = Duration::from_secs(10);
|
|
const POLL_INTERVAL: Duration = Duration::from_millis(500);
|
|
|
|
let mut elapsed = Duration::ZERO;
|
|
let mut index_metrics_elapsed = Duration::ZERO;
|
|
loop {
|
|
std::thread::sleep(POLL_INTERVAL);
|
|
if shutdown.load(Ordering::Acquire) {
|
|
break;
|
|
}
|
|
elapsed += POLL_INTERVAL;
|
|
index_metrics_elapsed += POLL_INTERVAL;
|
|
|
|
// Refresh index health metrics every 10s (faster than checkpoint).
|
|
#[cfg(feature = "metrics")]
|
|
if index_metrics_elapsed >= INDEX_METRICS_INTERVAL {
|
|
index_metrics_elapsed = Duration::ZERO;
|
|
refresh_index_metrics(&index_handles, &metrics);
|
|
}
|
|
|
|
if elapsed >= CHECKPOINT_INTERVAL {
|
|
elapsed = Duration::ZERO;
|
|
|
|
// Update signal hot entries gauge.
|
|
#[cfg(feature = "metrics")]
|
|
{
|
|
metrics
|
|
.signal_hot_entries
|
|
.store(ledger.entries().len() as u64, Ordering::Relaxed);
|
|
}
|
|
|
|
// (index health metrics refreshed every 10s in the block above)
|
|
|
|
// Trim signal ledger if over the memory budget (5M entries ~5.4 GB).
|
|
let entry_count = ledger.entries().len();
|
|
if entry_count > DEFAULT_MAX_SIGNAL_ENTRIES {
|
|
tracing::info!(
|
|
entry_count,
|
|
max_entries = DEFAULT_MAX_SIGNAL_ENTRIES,
|
|
"signal ledger exceeds memory budget — trimming cold entries"
|
|
);
|
|
let evicted = trim_cold_entries(ledger.entries(), DEFAULT_MAX_SIGNAL_ENTRIES);
|
|
tracing::info!(
|
|
evicted,
|
|
remaining = ledger.entries().len(),
|
|
"signal ledger trim complete"
|
|
);
|
|
}
|
|
|
|
let seq = last_wal_seq.load(Ordering::Relaxed);
|
|
let meta = crate::signals::checkpoint::CheckpointMeta {
|
|
checkpoint_time_ns: Timestamp::now().as_nanos(),
|
|
wal_sequence: seq,
|
|
payload_hash: [0u8; 32], // computed by checkpoint()
|
|
};
|
|
if let Err(e) = ledger.checkpoint(storage.as_ref(), meta) {
|
|
tracing::error!(error = %e, "periodic signal checkpoint failed");
|
|
metrics
|
|
.checkpoint_failures_total
|
|
.fetch_add(1, Ordering::Relaxed);
|
|
} else {
|
|
tracing::debug!("periodic signal checkpoint written");
|
|
|
|
// Update checkpoint age metric.
|
|
#[cfg(feature = "metrics")]
|
|
{
|
|
let now_ns = std::time::SystemTime::now()
|
|
.duration_since(std::time::UNIX_EPOCH)
|
|
.unwrap_or_default()
|
|
.as_nanos() as u64;
|
|
metrics.last_checkpoint_ns.store(now_ns, Ordering::Relaxed);
|
|
}
|
|
|
|
// Compact WAL segments covered by the checkpoint.
|
|
// This runs AFTER the checkpoint is durable, so deleted
|
|
// segments are guaranteed to be redundant.
|
|
if let Some(ref dir) = wal_dir
|
|
&& seq > 0
|
|
{
|
|
match crate::wal::compaction::compact_wal(dir, seq) {
|
|
Ok(result) => {
|
|
#[cfg(feature = "metrics")]
|
|
{
|
|
metrics
|
|
.wal_compacted_segments_total
|
|
.fetch_add(result.segments_deleted as u64, Ordering::Relaxed);
|
|
}
|
|
let _ = result; // suppress unused warning when metrics disabled
|
|
}
|
|
Err(e) => {
|
|
tracing::warn!(error = %e, "WAL compaction after checkpoint failed");
|
|
}
|
|
}
|
|
|
|
// Update WAL lag bytes: sum remaining segment file sizes.
|
|
#[cfg(feature = "metrics")]
|
|
{
|
|
let lag = compute_wal_lag_bytes(dir);
|
|
metrics.wal_lag_bytes.store(lag, Ordering::Relaxed);
|
|
}
|
|
}
|
|
}
|
|
// Checkpoint cohort signal state with the same meta.
|
|
if cohort_ledger.entry_count() > 0
|
|
&& let Err(e) = cohort_ledger.checkpoint(storage.as_ref(), meta)
|
|
{
|
|
tracing::error!(error = %e, "periodic cohort checkpoint failed");
|
|
}
|
|
}
|
|
}
|
|
|
|
// Suppress unused-variable warnings when metrics feature is disabled.
|
|
let _ = &metrics;
|
|
let _ = &index_handles;
|
|
}
|
|
|
|
/// Refresh index health metrics from the live index handles.
|
|
///
|
|
/// Called once per checkpoint cycle (~30s). Reads current stats from the
|
|
/// Tantivy text index, `USearch` embedding registry, and bitmap indexes, then
|
|
/// stores them into the corresponding `MetricsState` atomic gauges.
|
|
///
|
|
/// All stores use `Relaxed` ordering because these are monitoring gauges --
|
|
/// a slightly stale value is acceptable, and no other thread depends on the
|
|
/// freshness of any individual gauge.
|
|
#[cfg(feature = "metrics")]
|
|
fn refresh_index_metrics(handles: &IndexMetricsHandles, metrics: &MetricsState) {
|
|
// Tantivy text index.
|
|
if let Some(ref text) = handles.text_index {
|
|
let (segments, docs) = text.index_stats();
|
|
metrics
|
|
.tantivy_segment_count
|
|
.store(segments as u64, Ordering::Relaxed);
|
|
metrics.tantivy_indexed_docs.store(docs, Ordering::Relaxed);
|
|
}
|
|
|
|
// USearch embedding registry.
|
|
if let Ok(registry) = handles.embedding_registry.read() {
|
|
let (vectors, bytes) = registry.index_stats();
|
|
metrics
|
|
.usearch_vector_count
|
|
.store(vectors, Ordering::Relaxed);
|
|
metrics
|
|
.usearch_index_size_bytes
|
|
.store(bytes, Ordering::Relaxed);
|
|
}
|
|
|
|
// Bitmap indexes: sum cardinality across all four index types.
|
|
let cardinality = handles.bitmap_category.total_cardinality()
|
|
+ handles.bitmap_format.total_cardinality()
|
|
+ handles.bitmap_creator.total_cardinality()
|
|
+ handles.bitmap_tag.total_cardinality();
|
|
metrics
|
|
.bitmap_index_cardinality
|
|
.store(cardinality, Ordering::Relaxed);
|
|
}
|
|
|
|
/// Sum the file sizes of all remaining WAL segment files in the directory.
|
|
///
|
|
/// Returns 0 if the directory cannot be read or contains no segments.
|
|
/// Errors on individual file metadata reads are treated as 0 bytes
|
|
/// (non-fatal: this is a best-effort monitoring metric).
|
|
#[cfg(feature = "metrics")]
|
|
fn compute_wal_lag_bytes(wal_dir: &std::path::Path) -> u64 {
|
|
let Ok(segments) = crate::wal::segment::list_segments(wal_dir) else {
|
|
return 0;
|
|
};
|
|
segments
|
|
.iter()
|
|
.map(|(_, path)| std::fs::metadata(path).map(|m| m.len()).unwrap_or(0))
|
|
.sum()
|
|
}
|