tidaldb/tidal/tests/m8p2_replication.rs
jordan f4cfd6c81f feat: complete M8 replication primitives + forage enhancements + docs
Milestone 8 (phases 1-4):
- Shard-aware WAL segment naming, BatchHeader v2, ShardRouter
- Transport trait, InProcessTransport, WalShipper, FollowerDb
- HLC, PNCounter, LWWRegister, CrdtSignalState, ReconciliationEngine
- Session replication bridge with SeqNo/HWM, idempotency store

Forage application:
- Multi-source discovery engine with MAB exploration
- Embedding-based label system, server handlers, UI refresh

Other:
- QUICKSTART.md, README.md, milestone-8 planning docs
- Hard negative union semantics, RLHF export enhancements
- Recovery benchmark and visibility test expansions
- Split 8 oversized source files per CODING_GUIDELINES §9

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 13:17:19 -07:00

450 lines
14 KiB
Rust

//! M8p2 WAL Replication integration tests.
//!
//! Tests the full replication pipeline: leader writes signals, segments are
//! shipped (or directly injected) to a follower, and the follower's ledger
//! reflects the replicated signals. Also verifies follower write rejection
//! and replication lag gauge.
#![allow(clippy::unwrap_used)]
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use tidaldb::db::config::{NodeConfig, NodeRole};
use tidaldb::replication::lag::ReplicationLagGauge;
use tidaldb::replication::shard::ShardId;
use tidaldb::replication::state::ReplicationState;
use tidaldb::replication::transport::{Transport, TransportError, WalSegmentPayload};
use tidaldb::replication::{InProcessTransportFactory, WalSegmentId};
use tidaldb::schema::{DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window};
use tidaldb::signals::{NoopWalWriter, SignalLedger};
use tidaldb::wal::format::batch::{EventRecord, encode_batch};
use tidaldb::{TidalDb, TidalError};
/// Build a minimal schema with one signal type.
fn make_schema() -> tidaldb::schema::Schema {
let mut builder = SchemaBuilder::new();
let _ = builder
.signal(
"view",
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(7 * 24 * 3600),
},
)
.windows(&[Window::AllTime])
.velocity(false)
.add();
builder.build().expect("schema must be valid")
}
/// Resolve the signal type ID for "view" using a throwaway ledger.
///
/// Signal type IDs are deterministic (alphabetically sorted, starting at 0).
/// For a schema with one signal type "view", the ID is always 0.
fn resolve_view_type_id(schema: &tidaldb::schema::Schema) -> tidaldb::signals::SignalTypeId {
let ledger = SignalLedger::new(schema.clone(), Box::new(NoopWalWriter));
ledger.resolve_signal_type("view").unwrap()
}
/// Open a follower TidalDb (ephemeral, follower role).
fn open_follower(schema: tidaldb::schema::Schema) -> TidalDb {
TidalDb::builder()
.ephemeral()
.with_schema(schema)
.with_cluster(NodeConfig {
role: NodeRole::Follower,
..NodeConfig::default()
})
.open()
.expect("follower should open")
}
/// Open a leader TidalDb (ephemeral, leader role).
fn open_leader(schema: tidaldb::schema::Schema) -> TidalDb {
TidalDb::builder()
.ephemeral()
.with_schema(schema)
.with_cluster(NodeConfig {
role: NodeRole::Leader,
..NodeConfig::default()
})
.open()
.expect("leader should open")
}
// ── Test 1: Follower rejects write calls ─────────────────────────────────
#[test]
fn follower_rejects_signal_write() {
let schema = make_schema();
let follower = open_follower(schema);
let err = follower
.signal("view", EntityId::new(1), 1.0, Timestamp::now())
.expect_err("follower should reject signal writes");
assert!(
matches!(err, TidalError::ReadOnly(_)),
"expected ReadOnly error, got: {err}"
);
follower.close().unwrap();
}
#[test]
fn follower_rejects_write_item() {
let schema = make_schema();
let follower = open_follower(schema);
let meta = HashMap::from([("title".to_string(), "test".to_string())]);
let err = follower
.write_item_with_metadata(EntityId::new(1), &meta)
.expect_err("follower should reject item writes");
assert!(
matches!(err, TidalError::ReadOnly(_)),
"expected ReadOnly error, got: {err}"
);
follower.close().unwrap();
}
// ── Test 2: Leader accepts writes ────────────────────────────────────────
#[test]
fn leader_accepts_signal_write() {
let schema = make_schema();
let leader = open_leader(schema);
leader
.signal("view", EntityId::new(1), 1.0, Timestamp::now())
.expect("leader should accept signal writes");
let score = leader
.read_decay_score(EntityId::new(1), "view", 0)
.expect("read should succeed");
assert!(score.is_some(), "signal should have been recorded");
leader.close().unwrap();
}
// ── Test 3: Direct payload injection into follower ledger ────────────────
#[test]
fn payload_injection_updates_follower_ledger() {
let schema = make_schema();
let follower = open_follower(schema.clone());
// Resolve the signal type ID using a standalone ledger (same schema).
let type_id = resolve_view_type_id(&schema);
let state = follower.replication_state().clone();
// Build a WAL batch payload.
let events = vec![EventRecord {
entity_id: 42,
signal_type: type_id.as_u16() as u8,
weight: 3.0,
timestamp_nanos: 1_000_000_000,
}];
let bytes = encode_batch(&events, 1, 1).unwrap();
// Apply it through the receiver's apply_payload (via the public module).
// We cannot call apply_payload directly (it is private), so we use
// the InProcessTransport + spawn_receiver path instead.
// Create a oneshot channel-based transport.
let (tx, rx) = crossbeam::channel::bounded(4);
struct OneShotTransport {
rx: crossbeam::channel::Receiver<WalSegmentPayload>,
}
impl Transport for OneShotTransport {
fn send_segment(
&self,
_to: ShardId,
_payload: WalSegmentPayload,
) -> Result<(), TransportError> {
Ok(())
}
fn recv_segment(&self) -> Option<WalSegmentPayload> {
self.rx.recv().ok()
}
fn local_shard(&self) -> ShardId {
ShardId::SINGLE
}
}
let transport = Arc::new(OneShotTransport { rx });
follower.start_replication(Arc::clone(&transport)).unwrap();
// Send the payload.
tx.send(WalSegmentPayload {
id: WalSegmentId::new(tidaldb::replication::RegionId::SINGLE, ShardId::SINGLE, 1),
bytes,
event_count: 1,
})
.unwrap();
// Give the receiver a moment to process.
std::thread::sleep(Duration::from_millis(100));
// Verify the follower's ledger was updated.
let score = follower
.read_decay_score(EntityId::new(42), "view", 0)
.expect("read should succeed");
assert!(score.is_some(), "signal should be visible on follower");
// Verify replication state advanced.
let applied = state.applied_seqno(ShardId::SINGLE);
assert_eq!(applied, Some(1), "replication state should have advanced");
// Shutdown: drop sender so receiver exits.
drop(tx);
follower.close().unwrap();
}
// ── Test 4: Idempotent replay ────────────────────────────────────────────
#[test]
fn replay_is_idempotent() {
let schema = make_schema();
let ledger = Arc::new(SignalLedger::new(schema, Box::new(NoopWalWriter)));
let state = Arc::new(ReplicationState::single());
let type_id = ledger.resolve_signal_type("view").unwrap();
let events = vec![EventRecord {
entity_id: 10,
signal_type: type_id.as_u16() as u8,
weight: 5.0,
timestamp_nanos: 1_000_000_000,
}];
let bytes = encode_batch(&events, 1, 1).unwrap();
// Build a transport that delivers the same segment twice.
let (tx, rx) = crossbeam::channel::bounded(4);
struct MultiTransport {
rx: crossbeam::channel::Receiver<WalSegmentPayload>,
}
impl Transport for MultiTransport {
fn send_segment(
&self,
_to: ShardId,
_payload: WalSegmentPayload,
) -> Result<(), TransportError> {
Ok(())
}
fn recv_segment(&self) -> Option<WalSegmentPayload> {
self.rx.recv().ok()
}
fn local_shard(&self) -> ShardId {
ShardId::SINGLE
}
}
let transport = Arc::new(MultiTransport { rx });
let handle = tidaldb::replication::spawn_receiver(
Arc::clone(&transport),
Arc::clone(&ledger),
Arc::clone(&state),
);
// Send the same segment twice.
for _ in 0..2 {
tx.send(WalSegmentPayload {
id: WalSegmentId::new(tidaldb::replication::RegionId::SINGLE, ShardId::SINGLE, 1),
bytes: bytes.clone(),
event_count: 1,
})
.unwrap();
}
std::thread::sleep(Duration::from_millis(100));
drop(tx);
handle.join();
// The entity should exist with weight=5.0, not 10.0.
// (Idempotent replay means the second apply was a no-op.)
assert_eq!(state.applied_seqno(ShardId::SINGLE), Some(1));
// Read the hot tier directly to verify only one application.
let entry = ledger.entries().get(&(EntityId::new(10), type_id));
assert!(entry.is_some(), "entity should exist in ledger");
}
// ── Test 5: InProcessTransport end-to-end ────────────────────────────────
#[test]
fn in_process_transport_delivers_segment() {
let shards = vec![ShardId(0), ShardId(1)];
let mut transports = InProcessTransportFactory::new(&shards).build();
let t0 = transports.remove(&ShardId(0)).unwrap();
let t1 = transports.remove(&ShardId(1)).unwrap();
let schema = make_schema();
let ledger = Arc::new(SignalLedger::new(schema, Box::new(NoopWalWriter)));
let type_id = ledger.resolve_signal_type("view").unwrap();
// Shard 0 sends a segment to shard 1.
let events = vec![EventRecord {
entity_id: 99,
signal_type: type_id.as_u16() as u8,
weight: 2.0,
timestamp_nanos: 500,
}];
let bytes = encode_batch(&events, 1, 42).unwrap();
t0.send_segment(
ShardId(1),
WalSegmentPayload {
id: WalSegmentId::new(tidaldb::replication::RegionId::SINGLE, ShardId(0), 42),
bytes: bytes.clone(),
event_count: 1,
},
)
.unwrap();
// Shard 1 receives.
let payload = t1.recv_segment();
assert!(payload.is_some(), "shard 1 should receive the segment");
let payload = payload.unwrap();
assert_eq!(payload.id.seqno, 42);
assert_eq!(payload.event_count, 1);
assert_eq!(payload.bytes, bytes);
// Drop both transports to clean up.
drop(t0);
drop(t1);
}
// ── Test 6: ReplicationLagGauge ──────────────────────────────────────────
#[test]
fn replication_lag_gauge_tracks_lag() {
let state = Arc::new(ReplicationState::single());
let gauge = ReplicationLagGauge::new(ShardId::SINGLE, Arc::clone(&state));
// Initially, both leader and applied are 0 => lag = 0.
assert_eq!(gauge.lag_segments(), 0);
// Leader moves ahead.
gauge.update_leader_seqno(10);
assert_eq!(gauge.lag_segments(), 10);
// Follower catches up partially.
state.advance(ShardId::SINGLE, 7);
assert_eq!(gauge.lag_segments(), 3);
// Follower catches up completely.
state.advance(ShardId::SINGLE, 10);
assert_eq!(gauge.lag_segments(), 0);
}
// ── Test 7: Full pipeline: leader -> transport -> follower ───────────────
#[test]
fn full_pipeline_leader_to_follower() {
let schema = make_schema();
// Open leader and follower.
let leader = open_leader(schema.clone());
let follower = open_follower(schema.clone());
// Resolve type ID using a standalone ledger (same schema).
let type_id = resolve_view_type_id(&schema);
let follower_state = follower.replication_state().clone();
// Wire up a channel-based transport for the follower.
let (tx, rx) = crossbeam::channel::bounded(16);
struct PipeTransport {
rx: crossbeam::channel::Receiver<WalSegmentPayload>,
}
impl Transport for PipeTransport {
fn send_segment(
&self,
_to: ShardId,
_payload: WalSegmentPayload,
) -> Result<(), TransportError> {
Ok(())
}
fn recv_segment(&self) -> Option<WalSegmentPayload> {
self.rx.recv().ok()
}
fn local_shard(&self) -> ShardId {
ShardId::SINGLE
}
}
let transport = Arc::new(PipeTransport { rx });
follower.start_replication(Arc::clone(&transport)).unwrap();
// Write signals on the leader.
let ts = Timestamp::from_nanos(2_000_000_000);
leader.signal("view", EntityId::new(100), 1.0, ts).unwrap();
leader.signal("view", EntityId::new(101), 2.0, ts).unwrap();
// Simulate the shipper: build a WAL payload from the leader's signals
// and send it to the follower via the transport.
let events = vec![
EventRecord {
entity_id: 100,
signal_type: type_id.as_u16() as u8,
weight: 1.0,
timestamp_nanos: 2_000_000_000,
},
EventRecord {
entity_id: 101,
signal_type: type_id.as_u16() as u8,
weight: 2.0,
timestamp_nanos: 2_000_000_000,
},
];
let batch_bytes = encode_batch(&events, 1, 1).unwrap();
tx.send(WalSegmentPayload {
id: WalSegmentId::new(tidaldb::replication::RegionId::SINGLE, ShardId::SINGLE, 1),
bytes: batch_bytes,
event_count: 2,
})
.unwrap();
// Wait for the follower to process.
std::thread::sleep(Duration::from_millis(100));
// Verify the follower has the signals.
let score_100 = follower
.read_decay_score(EntityId::new(100), "view", 0)
.unwrap();
let score_101 = follower
.read_decay_score(EntityId::new(101), "view", 0)
.unwrap();
assert!(
score_100.is_some(),
"entity 100 should be visible on follower"
);
assert!(
score_101.is_some(),
"entity 101 should be visible on follower"
);
// Verify replication state.
// Batch has 2 events starting at seq 1, so last seq = 1 + 2 - 1 = 2.
let applied = follower_state.applied_seqno(ShardId::SINGLE);
assert_eq!(
applied,
Some(2),
"replication state should reflect applied batch"
);
// Cleanup.
drop(tx);
leader.close().unwrap();
follower.close().unwrap();
}