//! Milestone 7 Task 08: Hard Negative Crash Invariant Tests. //! //! Proves that after any close + reopen cycle, RETRIEVE never returns hidden //! items or blocked-creator content. The invariant under test: //! //! If user U has hidden item X or blocked creator C, then after close + reopen, //! `RETRIEVE ... FOR USER @U` must NEVER return item X or items from creator C. //! //! Design note: In-memory indexes (universe bitmap, category/format/range) are //! NOT persisted and must be rebuilt on reopen. After reopen, we call //! `repopulate_items` to re-write item metadata into the in-memory indexes. //! The hide/block relationship state IS persisted (fjall users keyspace) and //! is rebuilt by `rebuild_entity_state` on open. This test verifies that the //! rebuilt hide/block state correctly excludes items from RETRIEVE results. //! //! Tests: //! 1. Hidden items never returned after restart. //! 2. Blocked creator content never returned after restart. //! 3. Combined hide + block after restart. //! 4. Property test: random hides/blocks survive restart. //! 5. Hard negatives from direct hide survive restart. #![allow( clippy::unwrap_used, clippy::cast_precision_loss, clippy::too_many_lines )] use std::collections::{HashMap, HashSet}; use std::time::Duration; use proptest::prelude::*; use tempfile::tempdir; use tidaldb::TidalDb; use tidaldb::entities::RelationshipType; use tidaldb::query::retrieve::Retrieve; use tidaldb::schema::{DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window}; // ── Schema ────────────────────────────────────────────────────────────────── fn invariant_schema() -> tidaldb::schema::Schema { let mut builder = SchemaBuilder::new(); for &(name, half_life_days) in &[ ("view", 7), ("like", 14), ("skip", 1), ("hide", 1), ("block", 1), ] { let _ = builder .signal( name, EntityKind::Item, DecaySpec::Exponential { half_life: Duration::from_secs(half_life_days * 24 * 3600), }, ) .windows(&[Window::AllTime]) .velocity(false) .add(); } builder.build().expect("invariant schema must be valid") } // ── Helpers ───────────────────────────────────────────────────────────────── /// Creator assignment function: `creator_id = (item_id % 5) + 1`. const fn default_creator(id: u64) -> u64 { (id % 5) + 1 } /// Build item metadata for a given item ID and creator assignment function. fn item_metadata(id: u64, creator_fn: &F) -> HashMap where F: Fn(u64) -> u64, { let creator_id = creator_fn(id); let mut meta = HashMap::new(); meta.insert("title".to_string(), format!("Item {id}")); meta.insert("category".to_string(), "jazz".to_string()); meta.insert("format".to_string(), "audio".to_string()); meta.insert("creator_id".to_string(), creator_id.to_string()); meta } /// Write items with `creator_id` metadata and a view signal so they appear in /// RETRIEVE results. Items are assigned to creators via `creator_fn(item_id)`. fn write_items(db: &TidalDb, item_ids: &[u64], now: Timestamp, creator_fn: &F) where F: Fn(u64) -> u64, { for &id in item_ids { let meta = item_metadata(id, creator_fn); db.write_item_with_metadata(EntityId::new(id), &meta) .unwrap(); // Signal so the item has ranking weight. db.signal("view", EntityId::new(id), 1.0, now).unwrap(); } } /// Repopulate in-memory indexes after reopen. /// /// In-memory indexes (universe bitmap, category/format/range bitmaps) are NOT /// persisted. After reopen, the item data exists in fjall storage but the /// in-memory indexes are empty. This function re-writes item metadata to /// repopulate the indexes, making items discoverable by RETRIEVE. /// /// Signal state IS restored from checkpoint + WAL replay, so we do not need /// to re-signal items. fn repopulate_items(db: &TidalDb, item_ids: &[u64], creator_fn: &F) where F: Fn(u64) -> u64, { for &id in item_ids { let meta = item_metadata(id, creator_fn); db.write_item_with_metadata(EntityId::new(id), &meta) .unwrap(); } } /// RETRIEVE with FOR USER, using the "new" profile (sorts by entity ID desc, /// no signal requirements). Returns the set of item IDs in results. fn retrieve_for_user(db: &TidalDb, user_id: u64, limit: usize) -> HashSet { let query = Retrieve::builder() .profile("new") .for_user(user_id) .limit(limit) .build() .unwrap(); let results = db.retrieve(&query).unwrap(); results.items.iter().map(|r| r.entity_id.as_u64()).collect() } /// Open a persistent DB at the given directory with the invariant schema. fn open_persistent(dir: &std::path::Path) -> TidalDb { TidalDb::builder() .with_data_dir(dir) .with_schema(invariant_schema()) .open() .expect("persistent open should succeed") } // ── Test 1: Hidden items never returned after restart ─────────────────────── #[test] fn hidden_items_never_returned_after_restart() { let dir = tempdir().unwrap(); let now = Timestamp::now(); let user_id = 1001u64; let hidden_ids: HashSet = [3, 7, 15, 22].into_iter().collect(); let item_ids: Vec = (1..=30).collect(); // Phase 1: populate + hide + verify + close. { let db = open_persistent(dir.path()); write_items(&db, &item_ids, now, &default_creator); // Write user so user-context filtering is active. let user_meta = HashMap::new(); db.write_user(EntityId::new(user_id), &user_meta).unwrap(); // Hide specific items. for &hid in &hidden_ids { db.write_relationship( EntityId::new(user_id), RelationshipType::Hide, EntityId::new(hid), 1.0, now, ) .unwrap(); } // Pre-crash verification: hidden items must not appear. let pre_results = retrieve_for_user(&db, user_id, 100); for &hid in &hidden_ids { assert!( !pre_results.contains(&hid), "pre-crash: hidden item {hid} should not appear in results" ); } assert!( !pre_results.is_empty(), "pre-crash: should return some items" ); db.close().unwrap(); } // Phase 2: reopen + repopulate indexes + verify invariant holds. { let db = open_persistent(dir.path()); // Repopulate in-memory indexes (universe bitmap etc.) so items are // discoverable by RETRIEVE. Hide/block state was already rebuilt from // durable relationship edges by rebuild_entity_state. repopulate_items(&db, &item_ids, &default_creator); let post_results = retrieve_for_user(&db, user_id, 100); for &hid in &hidden_ids { assert!( !post_results.contains(&hid), "post-restart: hidden item {hid} must not appear in results" ); } assert!( !post_results.is_empty(), "post-restart: should return some items" ); // Verify all returned items are in the valid range and not hidden. for &id in &post_results { assert!( (1..=30).contains(&id), "post-restart: unexpected item id {id}" ); assert!( !hidden_ids.contains(&id), "post-restart: hidden item {id} leaked into results" ); } db.close().unwrap(); } } // ── Test 2: Blocked creator content never returned after restart ───────────── #[test] fn blocked_creator_content_never_returned_after_restart() { let dir = tempdir().unwrap(); let now = Timestamp::now(); let user_id = 1001u64; let blocked_creator = 3u64; let item_ids: Vec = (1..=30).collect(); // Phase 1: populate + block creator 3 + verify + close. { let db = open_persistent(dir.path()); // creator_id = (item_id % 5) + 1, so creator 3 owns items where // (item_id % 5) + 1 == 3, i.e. item_id % 5 == 2: items 2, 7, 12, 17, 22, 27. write_items(&db, &item_ids, now, &default_creator); let user_meta = HashMap::new(); db.write_user(EntityId::new(user_id), &user_meta).unwrap(); // Block creator 3. db.write_relationship( EntityId::new(user_id), RelationshipType::Blocks, EntityId::new(blocked_creator), 1.0, now, ) .unwrap(); // Pre-crash verification. let pre_results = retrieve_for_user(&db, user_id, 100); let creator3_items: HashSet = (1..=30) .filter(|&id| default_creator(id) == blocked_creator) .collect(); for &cid in &creator3_items { assert!( !pre_results.contains(&cid), "pre-crash: item {cid} from blocked creator {blocked_creator} should not appear" ); } assert!( !pre_results.is_empty(), "pre-crash: should return non-blocked items" ); db.close().unwrap(); } // Phase 2: reopen + repopulate + verify. { let db = open_persistent(dir.path()); repopulate_items(&db, &item_ids, &default_creator); let post_results = retrieve_for_user(&db, user_id, 100); let creator3_items: HashSet = (1..=30) .filter(|&id| default_creator(id) == blocked_creator) .collect(); for &cid in &creator3_items { assert!( !post_results.contains(&cid), "post-restart: item {cid} from blocked creator {blocked_creator} must not appear" ); } assert!( !post_results.is_empty(), "post-restart: should return non-blocked items" ); // Verify all returned items are from non-blocked creators. for &id in &post_results { let creator_id = default_creator(id); assert_ne!( creator_id, blocked_creator, "post-restart: item {id} from blocked creator {blocked_creator} leaked" ); } db.close().unwrap(); } } // ── Test 3: Combined hide and block after restart ─────────────────────────── #[test] fn combined_hide_and_block_after_restart() { let dir = tempdir().unwrap(); let now = Timestamp::now(); let user_id = 1001u64; let hidden_ids: HashSet = [5, 10, 25].into_iter().collect(); let blocked_creator = 2u64; let item_ids: Vec = (1..=30).collect(); // Phase 1: populate + hide items + block creator + verify + close. { let db = open_persistent(dir.path()); write_items(&db, &item_ids, now, &default_creator); let user_meta = HashMap::new(); db.write_user(EntityId::new(user_id), &user_meta).unwrap(); // Hide specific items. for &hid in &hidden_ids { db.write_relationship( EntityId::new(user_id), RelationshipType::Hide, EntityId::new(hid), 1.0, now, ) .unwrap(); } // Block creator 2: owns items where (id % 5) + 1 == 2, i.e. id % 5 == 1: // items 1, 6, 11, 16, 21, 26. db.write_relationship( EntityId::new(user_id), RelationshipType::Blocks, EntityId::new(blocked_creator), 1.0, now, ) .unwrap(); // Pre-crash verification. let pre_results = retrieve_for_user(&db, user_id, 100); let creator2_items: HashSet = (1..=30) .filter(|&id| default_creator(id) == blocked_creator) .collect(); let all_excluded: HashSet = hidden_ids.union(&creator2_items).copied().collect(); for &ex in &all_excluded { assert!( !pre_results.contains(&ex), "pre-crash: excluded item {ex} should not appear" ); } assert!( !pre_results.is_empty(), "pre-crash: should return some items" ); db.close().unwrap(); } // Phase 2: reopen + repopulate + verify. { let db = open_persistent(dir.path()); repopulate_items(&db, &item_ids, &default_creator); let post_results = retrieve_for_user(&db, user_id, 100); let creator2_items: HashSet = (1..=30) .filter(|&id| default_creator(id) == blocked_creator) .collect(); let all_excluded: HashSet = hidden_ids.union(&creator2_items).copied().collect(); for &ex in &all_excluded { assert!( !post_results.contains(&ex), "post-restart: excluded item {ex} must not appear in results" ); } assert!( !post_results.is_empty(), "post-restart: should return some items" ); // Every returned item must be from a non-blocked creator and not hidden. for &id in &post_results { let creator_id = default_creator(id); assert_ne!( creator_id, blocked_creator, "post-restart: item {id} from blocked creator leaked" ); assert!( !hidden_ids.contains(&id), "post-restart: hidden item {id} leaked" ); } db.close().unwrap(); } } // ── Test 4: Property test -- random hides/blocks survive restart ───────────── proptest! { // 100 cases = spec minimum from task-08; parameter space is small so each // case completes in ~1s, giving ~100s total -- acceptable for CI. #![proptest_config(ProptestConfig { cases: 100, max_shrink_iters: 50, ..Default::default() })] #[test] fn no_phantom_items_after_restart( hide_count in 0usize..8, blocked_creator_idx in 0u64..5, ) { let dir = tempdir().unwrap(); let now = Timestamp::now(); let user_id = 1001u64; let total_items = 30u64; let item_ids: Vec = (1..=total_items).collect(); // Deterministically select which items to hide: first `hide_count` items. let hidden_ids: HashSet = (1..=total_items) .filter(|id| (*id as usize) <= hide_count) .collect(); // Creator to block: one of creators 1-5. let blocked_creator = blocked_creator_idx + 1; // Phase 1: populate, hide, block, close. { let db = open_persistent(dir.path()); write_items(&db, &item_ids, now, &default_creator); let user_meta = HashMap::new(); db.write_user(EntityId::new(user_id), &user_meta).unwrap(); for &hid in &hidden_ids { db.write_relationship( EntityId::new(user_id), RelationshipType::Hide, EntityId::new(hid), 1.0, now, ) .unwrap(); } db.write_relationship( EntityId::new(user_id), RelationshipType::Blocks, EntityId::new(blocked_creator), 1.0, now, ) .unwrap(); db.close().unwrap(); } // Phase 2: reopen + repopulate + verify. { let db = open_persistent(dir.path()); repopulate_items(&db, &item_ids, &default_creator); let results = retrieve_for_user(&db, user_id, 100); // No hidden items must appear. for &hid in &hidden_ids { prop_assert!( !results.contains(&hid), "hidden item {hid} appeared after restart" ); } // No items from blocked creator must appear. let blocked_items: HashSet = (1..=total_items) .filter(|&id| default_creator(id) == blocked_creator) .collect(); for &bid in &blocked_items { prop_assert!( !results.contains(&bid), "item {bid} from blocked creator {blocked_creator} appeared after restart" ); } // Results should be non-empty (at least some items are neither hidden // nor from the blocked creator -- unless all 30 are excluded, which is // possible with hide_count=7 + blocked_creator covering 6 items = 13 // excluded, leaving 17 items). let total_excluded = hidden_ids.len() + blocked_items.len() - hidden_ids.intersection(&blocked_items).count(); if total_excluded < total_items as usize { prop_assert!( !results.is_empty(), "expected non-empty results when {} items are excluded out of {}", total_excluded, total_items ); } db.close().unwrap(); } } } // ── Test 5: Hard negatives from session survive restart ────────────────────── #[test] fn hard_negatives_from_session_survive_restart() { let dir = tempdir().unwrap(); let now = Timestamp::now(); let user_id = 1001u64; let hidden_via_hide = [4u64, 8, 16]; let item_ids: Vec = (1..=20).collect(); let creator_fn = |id: u64| (id % 3) + 1; // Phase 1: populate, hide items via write_relationship, close. { let db = open_persistent(dir.path()); write_items(&db, &item_ids, now, &creator_fn); let user_meta = HashMap::new(); db.write_user(EntityId::new(user_id), &user_meta).unwrap(); // Hide items via the RelationshipType::Hide path (this is the durable // path used by sessions when they hide items). for &hid in &hidden_via_hide { db.write_relationship( EntityId::new(user_id), RelationshipType::Hide, EntityId::new(hid), 1.0, now, ) .unwrap(); } // Verify pre-close. let pre_results = retrieve_for_user(&db, user_id, 100); for &hid in &hidden_via_hide { assert!( !pre_results.contains(&hid), "pre-close: hidden item {hid} should not appear" ); } assert!( !pre_results.is_empty(), "pre-close: should return some items" ); db.close().unwrap(); } // Phase 2: reopen + repopulate + verify hidden items stay hidden. { let db = open_persistent(dir.path()); repopulate_items(&db, &item_ids, &creator_fn); let post_results = retrieve_for_user(&db, user_id, 100); for &hid in &hidden_via_hide { assert!( !post_results.contains(&hid), "post-restart: hidden item {hid} must not appear in results" ); } assert!( !post_results.is_empty(), "post-restart: should return some items" ); // Verify the actual user state was rebuilt correctly. let hidden_bitmap = db.user_state().hidden_items(user_id); for &hid in &hidden_via_hide { #[allow(clippy::cast_possible_truncation)] let hid_u32 = hid as u32; assert!( hidden_bitmap.contains(hid_u32), "post-restart: user_state hidden_items should contain {hid}" ); } db.close().unwrap(); } }