tidaldb/tidal/src/replication/reconcile.rs
jordan f4cfd6c81f feat: complete M8 replication primitives + forage enhancements + docs
Milestone 8 (phases 1-4):
- Shard-aware WAL segment naming, BatchHeader v2, ShardRouter
- Transport trait, InProcessTransport, WalShipper, FollowerDb
- HLC, PNCounter, LWWRegister, CrdtSignalState, ReconciliationEngine
- Session replication bridge with SeqNo/HWM, idempotency store

Forage application:
- Multi-source discovery engine with MAB exploration
- Embedding-based label system, server handlers, UI refresh

Other:
- QUICKSTART.md, README.md, milestone-8 planning docs
- Hard negative union semantics, RLHF export enhancements
- Recovery benchmark and visibility test expansions
- Split 8 oversized source files per CODING_GUIDELINES §9

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 13:17:19 -07:00

341 lines
12 KiB
Rust

//! Reconciliation engine for deterministic merge after network partitions.
//!
//! When two `TidalDB` nodes diverge during a partition, each accumulates
//! independent signal events and hard-negative decisions. After the partition
//! heals, the `ReconciliationEngine` produces a deterministic `MergePlan`
//! from their diverged `StateSnapshot`s and applies it to the local state.
//!
//! # Merge semantics
//!
//! - **Signal states:** CRDT-merged per `(entity, signal_type)`. Each node's
//! contribution is summed (disjoint events); timestamps are max'd.
//! - **Hard negatives:** LWW-resolved per `(user, item)` by HLC timestamp.
//! The most recent hide or unhide wins deterministically.
//!
//! # Idempotency
//!
//! Applying a `MergePlan` is idempotent: applying the same plan twice
//! produces identical state. This is critical for at-least-once delivery
//! guarantees during reconnection.
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use crate::entities::HardNegIndex;
use crate::replication::crdt::{CrdtSignalState, LWWRegister};
use crate::schema::EntityId;
use crate::signals::{SignalLedger, SignalTypeId};
// ---------------------------------------------------------------------------
// HardNegAction
// ---------------------------------------------------------------------------
/// An action applied to a hard-negative register.
///
/// Stored inside an `LWWRegister<HardNegAction>` and resolved by HLC
/// timestamp during reconciliation.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum HardNegAction {
/// The user explicitly hid, muted, or blocked this item.
Hide,
/// The user reversed a previous hide (explicit un-hide).
Unhide,
}
// ---------------------------------------------------------------------------
// StateSnapshot
// ---------------------------------------------------------------------------
/// A snapshot of CRDT state for reconciliation.
///
/// Produced by `TidalDb::take_crdt_snapshot()` or constructed manually in
/// tests. Contains the per-key CRDT state for all entities and hard negatives
/// that participated in diverged writes.
#[derive(Debug, Clone, Default)]
pub struct StateSnapshot {
/// Per-(entity, `signal_type`) CRDT signal state.
signal_states: HashMap<(EntityId, SignalTypeId), CrdtSignalState>,
/// Per-(user, item) LWW hard-negative register.
hardneg_registers: HashMap<(EntityId, EntityId), LWWRegister<HardNegAction>>,
}
impl StateSnapshot {
/// Create an empty snapshot.
#[must_use]
pub fn new() -> Self {
Self::default()
}
/// Insert a signal state entry.
pub fn add_signal_state(
&mut self,
entity_id: EntityId,
signal_type_id: SignalTypeId,
state: CrdtSignalState,
) {
self.signal_states
.insert((entity_id, signal_type_id), state);
}
/// Insert a hard-negative register entry.
pub fn add_hardneg_register(
&mut self,
user_id: EntityId,
item_id: EntityId,
register: LWWRegister<HardNegAction>,
) {
self.hardneg_registers.insert((user_id, item_id), register);
}
/// Number of signal state entries.
#[must_use]
pub fn signal_count(&self) -> usize {
self.signal_states.len()
}
/// Number of hard-negative register entries.
#[must_use]
pub fn hardneg_count(&self) -> usize {
self.hardneg_registers.len()
}
/// Iterate over all signal state keys.
pub fn signal_keys(&self) -> impl Iterator<Item = (EntityId, SignalTypeId)> + '_ {
self.signal_states.keys().copied()
}
/// Get signal state for a key.
#[must_use]
pub fn signal_state(&self, key: (EntityId, SignalTypeId)) -> Option<&CrdtSignalState> {
self.signal_states.get(&key)
}
/// Iterate over all hard-negative keys.
pub fn hardneg_keys(&self) -> impl Iterator<Item = (EntityId, EntityId)> + '_ {
self.hardneg_registers.keys().copied()
}
/// Get hard-negative register for a key.
#[must_use]
pub fn hardneg_register(
&self,
key: (EntityId, EntityId),
) -> Option<&LWWRegister<HardNegAction>> {
self.hardneg_registers.get(&key)
}
}
// ---------------------------------------------------------------------------
// MergePlan operations
// ---------------------------------------------------------------------------
/// A merge operation for a single signal counter.
#[derive(Debug, Clone)]
pub struct SignalMergeOp {
/// The entity whose signal state is being merged.
pub entity_id: EntityId,
/// The signal type being merged.
pub signal_type_id: SignalTypeId,
/// The CRDT-merged state (union of both nodes' contributions).
pub merged_state: CrdtSignalState,
}
/// A resolution for a single hard-negative register.
#[derive(Debug, Clone)]
pub struct HardNegResolutionOp {
/// The user whose hard-negative is being resolved.
pub user_id: EntityId,
/// The item targeted by the hard-negative.
pub item_id: EntityId,
/// Winning action after LWW resolution. `None` means no hard negative
/// was ever written (both sides were empty).
pub action: Option<HardNegAction>,
}
// ---------------------------------------------------------------------------
// MergePlan
// ---------------------------------------------------------------------------
/// The reconciliation plan: a list of operations to apply.
///
/// Produced by `ReconciliationEngine::plan()`. Applying the plan is
/// idempotent -- applying it twice produces identical state.
#[derive(Debug, Clone)]
pub struct MergePlan {
/// Signal merge operations (one per diverged entity-signal pair).
pub signal_merges: Vec<SignalMergeOp>,
/// Hard-negative resolution operations (one per diverged user-item pair).
pub hardneg_resolutions: Vec<HardNegResolutionOp>,
}
impl MergePlan {
/// Total number of operations in this plan.
#[must_use]
pub const fn operation_count(&self) -> usize {
self.signal_merges.len() + self.hardneg_resolutions.len()
}
/// Whether this plan has no operations (snapshots were identical).
#[must_use]
pub const fn is_empty(&self) -> bool {
self.signal_merges.is_empty() && self.hardneg_resolutions.is_empty()
}
}
// ---------------------------------------------------------------------------
// ReconciliationEngine
// ---------------------------------------------------------------------------
/// Produces and applies reconciliation plans for partitioned shards.
///
/// The engine is bound to a local `SignalLedger` and `HardNegIndex`.
/// It does not own or modify the remote state -- the caller provides
/// snapshots and the engine computes a deterministic merge.
///
/// # Usage
///
/// ```ignore
/// let engine = ReconciliationEngine::new(
/// Arc::clone(&signal_ledger),
/// Arc::clone(&hard_neg_index),
/// );
/// let plan = engine.plan(&local_snapshot, &remote_snapshot);
/// engine.apply(&plan)?;
/// ```
pub struct ReconciliationEngine {
signal_ledger: Arc<SignalLedger>,
hard_neg_index: Arc<HardNegIndex>,
}
impl ReconciliationEngine {
/// Create a new engine bound to the given ledger and hard-neg index.
#[must_use]
pub const fn new(signal_ledger: Arc<SignalLedger>, hard_neg_index: Arc<HardNegIndex>) -> Self {
Self {
signal_ledger,
hard_neg_index,
}
}
/// Produce a deterministic merge plan from two diverged state snapshots.
///
/// - Signal states: union of both snapshots, CRDT-merged per
/// `(entity, signal_type)`.
/// - Hard negatives: LWW-resolved per `(user, item)` by HLC timestamp.
///
/// Entities/signals present on only one side are included unchanged
/// (no data loss -- single-sided state is still valid state).
#[must_use]
pub fn plan(&self, local: &StateSnapshot, remote: &StateSnapshot) -> MergePlan {
// -- Signal merges --
let signal_keys: HashSet<(EntityId, SignalTypeId)> =
local.signal_keys().chain(remote.signal_keys()).collect();
let mut signal_merges = Vec::with_capacity(signal_keys.len());
for key in signal_keys {
let local_state = local.signal_state(key);
let remote_state = remote.signal_state(key);
let merged = match (local_state, remote_state) {
(Some(l), Some(r)) => {
let mut m = l.clone();
m.merge(r);
m
}
(Some(l), None) => l.clone(),
(None, Some(r)) => r.clone(),
(None, None) => continue, // unreachable: key came from one of the iterators
};
signal_merges.push(SignalMergeOp {
entity_id: key.0,
signal_type_id: key.1,
merged_state: merged,
});
}
// -- Hard-negative resolutions --
let neg_keys: HashSet<(EntityId, EntityId)> =
local.hardneg_keys().chain(remote.hardneg_keys()).collect();
let mut hardneg_resolutions = Vec::with_capacity(neg_keys.len());
for key in neg_keys {
let local_reg = local.hardneg_register(key);
let remote_reg = remote.hardneg_register(key);
let resolved = match (local_reg, remote_reg) {
(Some(l), Some(r)) => {
let mut m = l.clone();
m.merge(r);
m
}
(Some(l), None) => l.clone(),
(None, Some(r)) => r.clone(),
(None, None) => continue, // unreachable
};
hardneg_resolutions.push(HardNegResolutionOp {
user_id: key.0,
item_id: key.1,
action: resolved.get().cloned(),
});
}
MergePlan {
signal_merges,
hardneg_resolutions,
}
}
/// Apply a merge plan to the local state.
///
/// Idempotent: applying the same plan twice produces identical state.
///
/// # Errors
///
/// Returns an error if any signal type in the plan is unknown to the
/// ledger's schema.
pub fn apply(&self, plan: &MergePlan) -> crate::Result<()> {
// Apply signal merges.
for op in &plan.signal_merges {
self.signal_ledger.apply_crdt_state(
op.entity_id,
op.signal_type_id,
&op.merged_state,
)?;
}
// Apply hard-negative resolutions.
for op in &plan.hardneg_resolutions {
// RoaringBitmap uses u32; EntityId wraps u64. Truncation is safe
// because HardNegIndex was designed for item IDs that fit in u32
// (RoaringBitmap constraint).
#[allow(clippy::cast_possible_truncation)]
let item_id = op.item_id.as_u64() as u32;
let user_id = op.user_id.as_u64();
match &op.action {
Some(HardNegAction::Hide) => {
self.hard_neg_index.add(user_id, item_id);
}
Some(HardNegAction::Unhide) | None => {
// Unhide or empty register: ensure the item is NOT in the
// hard-negative set.
self.hard_neg_index.remove(user_id, item_id);
}
}
}
Ok(())
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::float_cmp, clippy::cast_precision_loss)]
#[path = "reconcile_tests.rs"]
mod tests;