# Task 06: Multi-Tenancy Integration Tests ## Delivers Integration test suite in `tidal/tests/m8p5_multitenancy.rs` verifying: per-tenant rate limiting, tenant migration with zero downtime, rolling upgrade, and noisy-neighbor isolation. ## Complexity: M ## Dependencies - Tasks 01–05 complete ## Technical Design ```rust // tidal/tests/m8p5_multitenancy.rs use tidaldb::replication::{ TenantId, TenantConfig, TenantRouter, ClusterTopology, ShardAssignment, ControlPlane, TenantMigration, RollingUpgradeCoordinator, InProcessTransportFactory, }; fn three_shard_topology() -> ClusterTopology { ClusterTopology { shards: vec![ ShardAssignment { shard_id: ShardId(0), region_id: RegionId(0) }, ShardAssignment { shard_id: ShardId(1), region_id: RegionId(1) }, ShardAssignment { shard_id: ShardId(2), region_id: RegionId(2) }, ], } } /// Rate limiting: a tenant configured at 100 signals/sec is throttled /// when burst exceeds that rate. #[test] fn test_tenant_rate_limiting() { let topology = Arc::new(three_shard_topology()); let shard_router = Arc::new(ShardRouter::new(topology.clone())); let tenant_router = Arc::new(TenantRouter::new(shard_router, topology)); let tenant = TenantId(1); tenant_router.register_tenant(TenantConfig { tenant_id: tenant, max_signals_per_sec: Some(100), max_entities: None, max_storage_bytes: None, required_regions: vec![], label: "test-tenant".into(), }); let limiter = tenant_router.rate_limiter_for(tenant).unwrap(); // Drain the bucket: 200 immediate acquires. let mut allowed = 0; let mut throttled = 0; for _ in 0..200 { match limiter.try_acquire() { Ok(()) => allowed += 1, Err(TidalError::QuotaExceeded(_)) => throttled += 1, _ => panic!("unexpected error"), } } // At 100 signals/sec, we get 2s burst (200 tokens). All 200 should succeed // since the burst capacity is 2x rate. Let's verify that after exhaustion, next fails. assert!(throttled == 0, "burst capacity should absorb 200 signals"); // One more should fail. assert!( matches!(limiter.try_acquire(), Err(TidalError::QuotaExceeded(_))), "bucket should be empty after 200 signals" ); } /// Noisy neighbor: tenant A at full burst doesn't affect tenant B. #[test] fn test_noisy_neighbor_isolation() { let topology = Arc::new(three_shard_topology()); let shard_router = Arc::new(ShardRouter::new(topology.clone())); let tenant_router = Arc::new(TenantRouter::new(shard_router, topology)); let tenant_a = TenantId(1); let tenant_b = TenantId(2); tenant_router.register_tenant(TenantConfig { tenant_id: tenant_a, max_signals_per_sec: Some(10), // low limit max_entities: None, max_storage_bytes: None, required_regions: vec![], label: "noisy-tenant".into(), }); tenant_router.register_tenant(TenantConfig { tenant_id: tenant_b, max_signals_per_sec: Some(10_000), // high limit max_entities: None, max_storage_bytes: None, required_regions: vec![], label: "good-tenant".into(), }); let limiter_a = tenant_router.rate_limiter_for(tenant_a).unwrap(); let limiter_b = tenant_router.rate_limiter_for(tenant_b).unwrap(); // Exhaust tenant A's bucket. for _ in 0..1000 { let _ = limiter_a.try_acquire(); } // Tenant B should not be affected. for _ in 0..100 { assert!( limiter_b.try_acquire().is_ok(), "tenant B should not be throttled by tenant A's exhaustion" ); } } /// Residency policy: tenant configured to stay in region 1 only routes there. #[test] fn test_tenant_residency_policy() { let topology = Arc::new(three_shard_topology()); let shard_router = Arc::new(ShardRouter::new(topology.clone())); let tenant_router = Arc::new(TenantRouter::new(shard_router, topology)); let tenant = TenantId(10); tenant_router.register_tenant(TenantConfig { tenant_id: tenant, max_signals_per_sec: None, max_entities: None, max_storage_bytes: None, required_regions: vec![RegionId(1)], // EU residency label: "eu-tenant".into(), }); // All entities for this tenant should route to shard 1 (region 1). for i in 0u64..100 { let assignment = tenant_router.route(tenant, EntityId::new(i)).unwrap(); assert_eq!(assignment.region_id, RegionId(1), "entity {} should be in region 1 per residency policy", i); } } /// Tenant migration: move tenant 1 from shard 0 to shard 2 with zero downtime. #[tokio::test] async fn test_tenant_migration_zero_downtime() { let (db0, db2, factory) = setup_migration_cluster().await; let tenant = TenantId(1); let user = EntityId::new(1); // Write 100 signals to tenant 1 on shard 0 before migration. for i in 0..100u64 { db0.signal_for_tenant(tenant, "view", EntityId::new(i + 10), 1.0, Timestamp::now()) .unwrap(); } let migration = TenantMigration::new( tenant, ShardId(0), ShardId(2), db0.control_plane().clone(), db0.tenant_router().clone(), factory.transport(RegionId(0)), ); // Phase 1: ship existing WAL to target. migration.prepare_target().await.unwrap(); // Phase 2: enter dual-write; write 50 more signals. migration.enter_dual_write().await.unwrap(); for i in 100..150u64 { db0.signal_for_tenant(tenant, "view", EntityId::new(i + 10), 1.0, Timestamp::now()) .unwrap(); } // Phase 3: finalize. tokio::time::sleep(Duration::from_millis(100)).await; migration.finalize().await.unwrap(); // All 150 signals should be present on shard 2 (new home). let count_on_target = db2.total_signal_count_for_tenant(tenant, "view").unwrap(); assert_eq!(count_on_target, 150, "all signals must be on target shard after migration"); // Phase 4: GC (use 0 window for test). migration.gc_source(0).unwrap(); let count_on_source = db0.total_signal_count_for_tenant(tenant, "view").unwrap(); assert_eq!(count_on_source, 0, "source shard must have no tenant data after GC"); } /// Rolling upgrade: drain node, "upgrade", rejoin; signals written during /// the upgrade are present on the rejoined node. #[tokio::test] async fn test_rolling_upgrade_no_data_loss() { let (db_leader, db_followers, factory) = setup_three_node_cluster().await; let coordinator = RollingUpgradeCoordinator::new( db_leader.control_plane().clone(), db_leader.wal_shipper().clone(), ); // Drain follower 0. coordinator.drain(ShardId(1)).await.unwrap(); // Write 200 signals during the "upgrade window". for i in 0..200u64 { db_leader.signal("view", EntityId::new(i + 1), 1.0, Timestamp::now()).unwrap(); } // Rejoin (simulated: follower is already running, just re-enables routing). coordinator.rejoin(ShardId(1)).await.unwrap(); // All 200 signals must be present on the rejoined follower. let lag = db_leader.control_plane().lag_for(ShardId(1)); assert_eq!(lag, 0, "no replication lag after rejoin"); } ``` ## Acceptance Criteria - [ ] `test_tenant_rate_limiting`: 100-signal burst absorbed, 201st signal returns `QuotaExceeded` within 1ms - [ ] `test_noisy_neighbor_isolation`: exhausting tenant A's rate limiter has no effect on tenant B - [ ] `test_tenant_residency_policy`: all 100 entities for an EU-residency tenant route to region 1 - [ ] `test_tenant_migration_zero_downtime`: all 150 signals present on target shard after migration; source has 0 after GC - [ ] `test_rolling_upgrade_no_data_loss`: 200 signals written during drain window present on rejoined follower - [ ] All 5 tests pass in `cargo test --test m8p5_multitenancy` - [ ] `cargo clippy -D warnings` and `cargo fmt` pass