tidaldb/tidal/tests/m8p5_multitenancy.rs
jordan 98bdc18a49 feat: add iknowyou app + complete M8 replication extensions + Aeries agents/skills
- applications/iknowyou: new Next.js chat application with persona-aware conversations,
  briefing API, cohort logic, vLLM streaming, and sidebar navigation
- tidal M8: add replication control plane (control.rs), tenant migration state machine
  (migration.rs), tenant/upgrade coordinators, cluster/fault test harnesses
- tidal M8 tests: expand m8p2/m8p3/m8p4 test suites; add m8p5_multitenancy and m8_uat
- tidal db: split replication_ops out of db/mod.rs (was 647 lines, now 574)
- .claude: add kai-park, kaya-osei, mira-vasquez agents; add aeries-design-architect,
  aeries-fullstack-engineer, aeries-product-visionary skills
- docs: update ROADMAP.md

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-24 21:09:11 -07:00

345 lines
12 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! m8p5 integration tests: Control Plane, Multi-Tenancy, and Routing.
//! All tests are sync (`#[test]`). No `tokio::test`.
#![allow(clippy::unwrap_used)]
use std::sync::{Arc, RwLock};
use tidaldb::replication::state::ReplicationState;
use tidaldb::replication::{
ClusterTopology, ControlPlane, MigrationState, RegionId, ReplicationLagGauge, ShardAssignment,
ShardId, TenantConfig, TenantId, TenantMigration, TenantRouter, UpgradePhase,
};
use tidaldb::schema::EntityId;
fn make_router_and_cp() -> (Arc<TenantRouter>, Arc<ControlPlane>) {
let topo = Arc::new(RwLock::new(ClusterTopology::single()));
let router = Arc::new(TenantRouter::new(Arc::clone(&topo)));
let state = Arc::new(ReplicationState::single());
let lag = Arc::new(ReplicationLagGauge::new(ShardId::SINGLE, state));
let cp = Arc::new(ControlPlane::new(topo, Arc::clone(&router), lag));
(router, cp)
}
fn make_two_shard_router_and_cp() -> (Arc<TenantRouter>, Arc<ControlPlane>) {
let topo = Arc::new(RwLock::new(ClusterTopology {
shards: vec![
ShardAssignment {
shard_id: ShardId(0),
region_id: RegionId(0),
},
ShardAssignment {
shard_id: ShardId(1),
region_id: RegionId(1),
},
],
}));
let router = Arc::new(TenantRouter::new(Arc::clone(&topo)));
let state = Arc::new(ReplicationState::single());
let lag = Arc::new(ReplicationLagGauge::new(ShardId::SINGLE, state));
let cp = Arc::new(ControlPlane::new(topo, Arc::clone(&router), lag));
(router, cp)
}
// ─── Rate Limiting ──────────────────────────────────────────────────────────
#[test]
fn test_tenant_rate_limiting() {
use tidaldb::TidalError;
let (router, _cp) = make_router_and_cp();
let mut cfg = TenantConfig::default_tenant();
cfg.max_signals_per_sec = Some(100);
router.register_tenant(cfg);
let limiter = router.rate_limiter_for(TenantId::DEFAULT).expect("limiter");
// Burst capacity = 2× rate = 200 tokens. All 200 should succeed.
for i in 0..200 {
assert!(
limiter.try_acquire().is_ok(),
"acquisition #{i} should succeed (burst capacity = 200)"
);
}
let err = limiter.try_acquire().expect_err("should be quota exceeded");
assert!(matches!(err, TidalError::QuotaExceeded(_)));
}
// ─── Noisy Neighbor ──────────────────────────────────────────────────────────
#[test]
fn test_noisy_neighbor_isolation() {
let (router, _cp) = make_router_and_cp();
let mut cfg_a = TenantConfig::default_tenant();
cfg_a.max_signals_per_sec = Some(50);
router.register_tenant(cfg_a);
let mut cfg_b = TenantConfig::unlimited(TenantId(1), "tenant-b");
cfg_b.max_signals_per_sec = Some(50);
router.register_tenant(cfg_b);
let limiter_a = router
.rate_limiter_for(TenantId::DEFAULT)
.expect("limiter A");
let limiter_b = router.rate_limiter_for(TenantId(1)).expect("limiter B");
// Exhaust tenant A's bucket (2× burst = 100 tokens).
for _ in 0..100 {
let _ = limiter_a.try_acquire();
}
assert!(
limiter_a.try_acquire().is_err(),
"tenant A should be rate limited"
);
assert!(
limiter_b.try_acquire().is_ok(),
"tenant B should not be affected by tenant A's exhaustion"
);
}
// ─── Residency Policy ────────────────────────────────────────────────────────
#[test]
fn test_tenant_residency_policy() {
let topo = Arc::new(RwLock::new(ClusterTopology {
shards: vec![
ShardAssignment {
shard_id: ShardId(0),
region_id: RegionId(0),
},
ShardAssignment {
shard_id: ShardId(1),
region_id: RegionId(1),
},
],
}));
let router = Arc::new(TenantRouter::new(Arc::clone(&topo)));
let cfg = TenantConfig {
tenant_id: TenantId(42),
max_signals_per_sec: None,
max_entities: None,
max_storage_bytes: None,
required_regions: vec![RegionId(1)],
label: "region-1-only".to_string(),
};
router.register_tenant(cfg);
for i in 0u64..100 {
let assignment = router.route(TenantId(42), EntityId::new(i)).expect("route");
assert_eq!(
assignment.shard_id,
ShardId(1),
"entity {i} routed to wrong shard"
);
assert_eq!(assignment.region_id, RegionId(1));
}
}
// ─── Migration State Machine ─────────────────────────────────────────────────
#[test]
fn test_migration_state_machine() {
use tidaldb::TidalError;
let (router, cp) = make_router_and_cp();
let migration = TenantMigration::new(
TenantId(10),
ShardId::SINGLE,
ShardId(1),
Arc::clone(&cp),
Arc::clone(&router),
);
assert_eq!(migration.current_state(), MigrationState::Idle);
// Invalid: enter_dual_write before prepare_target.
let err = migration.enter_dual_write(0).expect_err("should fail");
assert!(matches!(err, TidalError::InvalidState(_)));
// Idle -> PreparingTarget
let seqno = migration.prepare_target(50).expect("prepare_target");
assert_eq!(seqno, 50);
assert!(matches!(
migration.current_state(),
MigrationState::PreparingTarget {
last_shipped_seqno: 50
}
));
// Invalid: prepare_target again.
let err = migration.prepare_target(99).expect_err("should fail");
assert!(matches!(err, TidalError::InvalidState(_)));
// PreparingTarget -> DualWrite
let cutover = migration.enter_dual_write(100).expect("enter_dual_write");
assert_eq!(cutover, 100);
assert!(matches!(
migration.current_state(),
MigrationState::DualWrite { cutover_seqno: 100 }
));
// Invalid: finalize when target behind cutover — NotReady (not InvalidState).
let err = migration.finalize(50).expect_err("should fail");
assert!(
matches!(err, TidalError::NotReady(_)),
"expected NotReady, got: {err}"
);
// DualWrite -> Finalizing
migration.finalize(150).expect("finalize");
assert!(matches!(
migration.current_state(),
MigrationState::Finalizing { .. }
));
// Invalid: finalize again.
let err = migration.finalize(200).expect_err("should fail");
assert!(matches!(err, TidalError::InvalidState(_)));
// Finalizing -> Complete (GC window = 0)
migration.gc_source(0).expect("gc_source");
assert_eq!(migration.current_state(), MigrationState::Complete);
// Invalid: transition from Complete.
let err = migration.prepare_target(1).expect_err("should fail");
assert!(matches!(err, TidalError::InvalidState(_)));
}
/// GC window enforcement: `gc_source` must reject calls before the window elapses.
#[test]
fn test_gc_source_rejects_before_window_elapses() {
use tidaldb::TidalError;
let (router, cp) = make_router_and_cp();
let migration = TenantMigration::new(
TenantId(11),
ShardId::SINGLE,
ShardId(1),
Arc::clone(&cp),
Arc::clone(&router),
);
migration.prepare_target(10).unwrap();
migration.enter_dual_write(20).unwrap();
migration.finalize(25).unwrap();
// A 10-minute GC window has definitely not elapsed since finalize() just ran.
let err = migration
.gc_source(600_000_000_000)
.expect_err("should be rejected");
assert!(
matches!(err, TidalError::InvalidState(_)),
"expected InvalidState for GC window not elapsed, got: {err}"
);
}
// ─── Dual-Write Routing ───────────────────────────────────────────────────────
/// During dual-write mode, `write_assignments` returns both source and target shards.
#[test]
fn test_dual_write_routing_returns_both_shards() {
let (router, _cp) = make_two_shard_router_and_cp();
router.set_dual_write(TenantId(5), ShardId(0), ShardId(1));
assert!(router.is_dual_write(TenantId(5)));
let assignments = router
.write_assignments(TenantId(5), EntityId::new(42))
.expect("write_assignments");
assert_eq!(assignments.len(), 2, "dual-write must return 2 assignments");
let shard_ids: Vec<_> = assignments.iter().map(|a| a.shard_id).collect();
assert!(
shard_ids.contains(&ShardId(0)),
"source shard must be in assignments"
);
assert!(
shard_ids.contains(&ShardId(1)),
"target shard must be in assignments"
);
}
/// After `finalize_migration`, routing pins to the target shard.
#[test]
fn test_finalize_migration_pins_to_target() {
let (router, _cp) = make_two_shard_router_and_cp();
router.set_dual_write(TenantId(7), ShardId(0), ShardId(1));
router.finalize_migration(TenantId(7), ShardId(1));
// No longer in dual-write mode.
assert!(!router.is_dual_write(TenantId(7)));
// Pinned to target shard.
assert_eq!(router.pinned_shard(TenantId(7)), Some(ShardId(1)));
// All routing goes to the target shard.
for i in 0u64..20 {
let assignment = router.route(TenantId(7), EntityId::new(i)).unwrap();
assert_eq!(
assignment.shard_id,
ShardId(1),
"post-migration routing must pin to target shard"
);
}
// write_assignments returns only the target after finalization.
let writes = router
.write_assignments(TenantId(7), EntityId::new(99))
.unwrap();
assert_eq!(writes.len(), 1);
assert_eq!(writes[0].shard_id, ShardId(1));
}
// ─── Rolling Upgrade ─────────────────────────────────────────────────────────
#[test]
fn test_rolling_upgrade_drain_rejoin() {
use tidaldb::TidalError;
// Use a 2-shard topology so that draining 1 shard still leaves 1 serving.
let (_router, cp) = make_two_shard_router_and_cp();
let coordinator = tidaldb::replication::RollingUpgradeCoordinator::new(Arc::clone(&cp));
assert_eq!(coordinator.current_phase(), UpgradePhase::Ready);
assert!(!coordinator.is_drained(ShardId(0)));
coordinator.drain(ShardId(0)).expect("drain shard 0");
assert!(coordinator.is_drained(ShardId(0)));
assert!(matches!(
coordinator.current_phase(),
UpgradePhase::Draining {
shard_id: ShardId(0)
}
));
// Cannot start a second concurrent drain.
let err = coordinator.drain(ShardId(1)).expect_err("should fail");
assert!(matches!(err, TidalError::InvalidState(_)));
coordinator.rejoin(ShardId(0)).expect("rejoin");
assert!(!coordinator.is_drained(ShardId(0)));
assert_eq!(coordinator.current_phase(), UpgradePhase::Ready);
// Can drain the other shard after rejoin.
coordinator.drain(ShardId(1)).expect("drain shard 1");
assert!(coordinator.is_drained(ShardId(1)));
}
/// Draining the sole shard in a single-node cluster must be refused.
#[test]
fn test_drain_single_node_cluster_is_refused() {
use tidaldb::TidalError;
let (_router, cp) = make_router_and_cp(); // single-shard
let coordinator = tidaldb::replication::RollingUpgradeCoordinator::new(Arc::clone(&cp));
let err = coordinator
.drain(ShardId::SINGLE)
.expect_err("must refuse to drain the only node");
assert!(
matches!(err, TidalError::InvalidState(_)),
"expected InvalidState, got: {err}"
);
}