Milestone 8 (phases 1-4): - Shard-aware WAL segment naming, BatchHeader v2, ShardRouter - Transport trait, InProcessTransport, WalShipper, FollowerDb - HLC, PNCounter, LWWRegister, CrdtSignalState, ReconciliationEngine - Session replication bridge with SeqNo/HWM, idempotency store Forage application: - Multi-source discovery engine with MAB exploration - Embedding-based label system, server handlers, UI refresh Other: - QUICKSTART.md, README.md, milestone-8 planning docs - Hard negative union semantics, RLHF export enhancements - Recovery benchmark and visibility test expansions - Split 8 oversized source files per CODING_GUIDELINES §9 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
341 lines
12 KiB
Rust
341 lines
12 KiB
Rust
//! Reconciliation engine for deterministic merge after network partitions.
|
|
//!
|
|
//! When two `TidalDB` nodes diverge during a partition, each accumulates
|
|
//! independent signal events and hard-negative decisions. After the partition
|
|
//! heals, the `ReconciliationEngine` produces a deterministic `MergePlan`
|
|
//! from their diverged `StateSnapshot`s and applies it to the local state.
|
|
//!
|
|
//! # Merge semantics
|
|
//!
|
|
//! - **Signal states:** CRDT-merged per `(entity, signal_type)`. Each node's
|
|
//! contribution is summed (disjoint events); timestamps are max'd.
|
|
//! - **Hard negatives:** LWW-resolved per `(user, item)` by HLC timestamp.
|
|
//! The most recent hide or unhide wins deterministically.
|
|
//!
|
|
//! # Idempotency
|
|
//!
|
|
//! Applying a `MergePlan` is idempotent: applying the same plan twice
|
|
//! produces identical state. This is critical for at-least-once delivery
|
|
//! guarantees during reconnection.
|
|
|
|
use std::collections::{HashMap, HashSet};
|
|
use std::sync::Arc;
|
|
|
|
use crate::entities::HardNegIndex;
|
|
use crate::replication::crdt::{CrdtSignalState, LWWRegister};
|
|
use crate::schema::EntityId;
|
|
use crate::signals::{SignalLedger, SignalTypeId};
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// HardNegAction
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/// An action applied to a hard-negative register.
|
|
///
|
|
/// Stored inside an `LWWRegister<HardNegAction>` and resolved by HLC
|
|
/// timestamp during reconciliation.
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub enum HardNegAction {
|
|
/// The user explicitly hid, muted, or blocked this item.
|
|
Hide,
|
|
/// The user reversed a previous hide (explicit un-hide).
|
|
Unhide,
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// StateSnapshot
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/// A snapshot of CRDT state for reconciliation.
|
|
///
|
|
/// Produced by `TidalDb::take_crdt_snapshot()` or constructed manually in
|
|
/// tests. Contains the per-key CRDT state for all entities and hard negatives
|
|
/// that participated in diverged writes.
|
|
#[derive(Debug, Clone, Default)]
|
|
pub struct StateSnapshot {
|
|
/// Per-(entity, `signal_type`) CRDT signal state.
|
|
signal_states: HashMap<(EntityId, SignalTypeId), CrdtSignalState>,
|
|
/// Per-(user, item) LWW hard-negative register.
|
|
hardneg_registers: HashMap<(EntityId, EntityId), LWWRegister<HardNegAction>>,
|
|
}
|
|
|
|
impl StateSnapshot {
|
|
/// Create an empty snapshot.
|
|
#[must_use]
|
|
pub fn new() -> Self {
|
|
Self::default()
|
|
}
|
|
|
|
/// Insert a signal state entry.
|
|
pub fn add_signal_state(
|
|
&mut self,
|
|
entity_id: EntityId,
|
|
signal_type_id: SignalTypeId,
|
|
state: CrdtSignalState,
|
|
) {
|
|
self.signal_states
|
|
.insert((entity_id, signal_type_id), state);
|
|
}
|
|
|
|
/// Insert a hard-negative register entry.
|
|
pub fn add_hardneg_register(
|
|
&mut self,
|
|
user_id: EntityId,
|
|
item_id: EntityId,
|
|
register: LWWRegister<HardNegAction>,
|
|
) {
|
|
self.hardneg_registers.insert((user_id, item_id), register);
|
|
}
|
|
|
|
/// Number of signal state entries.
|
|
#[must_use]
|
|
pub fn signal_count(&self) -> usize {
|
|
self.signal_states.len()
|
|
}
|
|
|
|
/// Number of hard-negative register entries.
|
|
#[must_use]
|
|
pub fn hardneg_count(&self) -> usize {
|
|
self.hardneg_registers.len()
|
|
}
|
|
|
|
/// Iterate over all signal state keys.
|
|
pub fn signal_keys(&self) -> impl Iterator<Item = (EntityId, SignalTypeId)> + '_ {
|
|
self.signal_states.keys().copied()
|
|
}
|
|
|
|
/// Get signal state for a key.
|
|
#[must_use]
|
|
pub fn signal_state(&self, key: (EntityId, SignalTypeId)) -> Option<&CrdtSignalState> {
|
|
self.signal_states.get(&key)
|
|
}
|
|
|
|
/// Iterate over all hard-negative keys.
|
|
pub fn hardneg_keys(&self) -> impl Iterator<Item = (EntityId, EntityId)> + '_ {
|
|
self.hardneg_registers.keys().copied()
|
|
}
|
|
|
|
/// Get hard-negative register for a key.
|
|
#[must_use]
|
|
pub fn hardneg_register(
|
|
&self,
|
|
key: (EntityId, EntityId),
|
|
) -> Option<&LWWRegister<HardNegAction>> {
|
|
self.hardneg_registers.get(&key)
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// MergePlan operations
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/// A merge operation for a single signal counter.
|
|
#[derive(Debug, Clone)]
|
|
pub struct SignalMergeOp {
|
|
/// The entity whose signal state is being merged.
|
|
pub entity_id: EntityId,
|
|
/// The signal type being merged.
|
|
pub signal_type_id: SignalTypeId,
|
|
/// The CRDT-merged state (union of both nodes' contributions).
|
|
pub merged_state: CrdtSignalState,
|
|
}
|
|
|
|
/// A resolution for a single hard-negative register.
|
|
#[derive(Debug, Clone)]
|
|
pub struct HardNegResolutionOp {
|
|
/// The user whose hard-negative is being resolved.
|
|
pub user_id: EntityId,
|
|
/// The item targeted by the hard-negative.
|
|
pub item_id: EntityId,
|
|
/// Winning action after LWW resolution. `None` means no hard negative
|
|
/// was ever written (both sides were empty).
|
|
pub action: Option<HardNegAction>,
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// MergePlan
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/// The reconciliation plan: a list of operations to apply.
|
|
///
|
|
/// Produced by `ReconciliationEngine::plan()`. Applying the plan is
|
|
/// idempotent -- applying it twice produces identical state.
|
|
#[derive(Debug, Clone)]
|
|
pub struct MergePlan {
|
|
/// Signal merge operations (one per diverged entity-signal pair).
|
|
pub signal_merges: Vec<SignalMergeOp>,
|
|
/// Hard-negative resolution operations (one per diverged user-item pair).
|
|
pub hardneg_resolutions: Vec<HardNegResolutionOp>,
|
|
}
|
|
|
|
impl MergePlan {
|
|
/// Total number of operations in this plan.
|
|
#[must_use]
|
|
pub const fn operation_count(&self) -> usize {
|
|
self.signal_merges.len() + self.hardneg_resolutions.len()
|
|
}
|
|
|
|
/// Whether this plan has no operations (snapshots were identical).
|
|
#[must_use]
|
|
pub const fn is_empty(&self) -> bool {
|
|
self.signal_merges.is_empty() && self.hardneg_resolutions.is_empty()
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// ReconciliationEngine
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/// Produces and applies reconciliation plans for partitioned shards.
|
|
///
|
|
/// The engine is bound to a local `SignalLedger` and `HardNegIndex`.
|
|
/// It does not own or modify the remote state -- the caller provides
|
|
/// snapshots and the engine computes a deterministic merge.
|
|
///
|
|
/// # Usage
|
|
///
|
|
/// ```ignore
|
|
/// let engine = ReconciliationEngine::new(
|
|
/// Arc::clone(&signal_ledger),
|
|
/// Arc::clone(&hard_neg_index),
|
|
/// );
|
|
/// let plan = engine.plan(&local_snapshot, &remote_snapshot);
|
|
/// engine.apply(&plan)?;
|
|
/// ```
|
|
pub struct ReconciliationEngine {
|
|
signal_ledger: Arc<SignalLedger>,
|
|
hard_neg_index: Arc<HardNegIndex>,
|
|
}
|
|
|
|
impl ReconciliationEngine {
|
|
/// Create a new engine bound to the given ledger and hard-neg index.
|
|
#[must_use]
|
|
pub const fn new(signal_ledger: Arc<SignalLedger>, hard_neg_index: Arc<HardNegIndex>) -> Self {
|
|
Self {
|
|
signal_ledger,
|
|
hard_neg_index,
|
|
}
|
|
}
|
|
|
|
/// Produce a deterministic merge plan from two diverged state snapshots.
|
|
///
|
|
/// - Signal states: union of both snapshots, CRDT-merged per
|
|
/// `(entity, signal_type)`.
|
|
/// - Hard negatives: LWW-resolved per `(user, item)` by HLC timestamp.
|
|
///
|
|
/// Entities/signals present on only one side are included unchanged
|
|
/// (no data loss -- single-sided state is still valid state).
|
|
#[must_use]
|
|
pub fn plan(&self, local: &StateSnapshot, remote: &StateSnapshot) -> MergePlan {
|
|
// -- Signal merges --
|
|
let signal_keys: HashSet<(EntityId, SignalTypeId)> =
|
|
local.signal_keys().chain(remote.signal_keys()).collect();
|
|
|
|
let mut signal_merges = Vec::with_capacity(signal_keys.len());
|
|
for key in signal_keys {
|
|
let local_state = local.signal_state(key);
|
|
let remote_state = remote.signal_state(key);
|
|
|
|
let merged = match (local_state, remote_state) {
|
|
(Some(l), Some(r)) => {
|
|
let mut m = l.clone();
|
|
m.merge(r);
|
|
m
|
|
}
|
|
(Some(l), None) => l.clone(),
|
|
(None, Some(r)) => r.clone(),
|
|
(None, None) => continue, // unreachable: key came from one of the iterators
|
|
};
|
|
|
|
signal_merges.push(SignalMergeOp {
|
|
entity_id: key.0,
|
|
signal_type_id: key.1,
|
|
merged_state: merged,
|
|
});
|
|
}
|
|
|
|
// -- Hard-negative resolutions --
|
|
let neg_keys: HashSet<(EntityId, EntityId)> =
|
|
local.hardneg_keys().chain(remote.hardneg_keys()).collect();
|
|
|
|
let mut hardneg_resolutions = Vec::with_capacity(neg_keys.len());
|
|
for key in neg_keys {
|
|
let local_reg = local.hardneg_register(key);
|
|
let remote_reg = remote.hardneg_register(key);
|
|
|
|
let resolved = match (local_reg, remote_reg) {
|
|
(Some(l), Some(r)) => {
|
|
let mut m = l.clone();
|
|
m.merge(r);
|
|
m
|
|
}
|
|
(Some(l), None) => l.clone(),
|
|
(None, Some(r)) => r.clone(),
|
|
(None, None) => continue, // unreachable
|
|
};
|
|
|
|
hardneg_resolutions.push(HardNegResolutionOp {
|
|
user_id: key.0,
|
|
item_id: key.1,
|
|
action: resolved.get().cloned(),
|
|
});
|
|
}
|
|
|
|
MergePlan {
|
|
signal_merges,
|
|
hardneg_resolutions,
|
|
}
|
|
}
|
|
|
|
/// Apply a merge plan to the local state.
|
|
///
|
|
/// Idempotent: applying the same plan twice produces identical state.
|
|
///
|
|
/// # Errors
|
|
///
|
|
/// Returns an error if any signal type in the plan is unknown to the
|
|
/// ledger's schema.
|
|
pub fn apply(&self, plan: &MergePlan) -> crate::Result<()> {
|
|
// Apply signal merges.
|
|
for op in &plan.signal_merges {
|
|
self.signal_ledger.apply_crdt_state(
|
|
op.entity_id,
|
|
op.signal_type_id,
|
|
&op.merged_state,
|
|
)?;
|
|
}
|
|
|
|
// Apply hard-negative resolutions.
|
|
for op in &plan.hardneg_resolutions {
|
|
// RoaringBitmap uses u32; EntityId wraps u64. Truncation is safe
|
|
// because HardNegIndex was designed for item IDs that fit in u32
|
|
// (RoaringBitmap constraint).
|
|
#[allow(clippy::cast_possible_truncation)]
|
|
let item_id = op.item_id.as_u64() as u32;
|
|
let user_id = op.user_id.as_u64();
|
|
|
|
match &op.action {
|
|
Some(HardNegAction::Hide) => {
|
|
self.hard_neg_index.add(user_id, item_id);
|
|
}
|
|
Some(HardNegAction::Unhide) | None => {
|
|
// Unhide or empty register: ensure the item is NOT in the
|
|
// hard-negative set.
|
|
self.hard_neg_index.remove(user_id, item_id);
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Tests
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[cfg(test)]
|
|
#[allow(clippy::unwrap_used, clippy::float_cmp, clippy::cast_precision_loss)]
|
|
#[path = "reconcile_tests.rs"]
|
|
mod tests;
|