tidaldb/tidal/tests/m7_crash_invariant.rs
2026-02-23 22:41:16 -07:00

601 lines
20 KiB
Rust

//! Milestone 7 Task 08: Hard Negative Crash Invariant Tests.
//!
//! Proves that after any close + reopen cycle, RETRIEVE never returns hidden
//! items or blocked-creator content. The invariant under test:
//!
//! If user U has hidden item X or blocked creator C, then after close + reopen,
//! `RETRIEVE ... FOR USER @U` must NEVER return item X or items from creator C.
//!
//! Design note: In-memory indexes (universe bitmap, category/format/range) are
//! NOT persisted and must be rebuilt on reopen. After reopen, we call
//! `repopulate_items` to re-write item metadata into the in-memory indexes.
//! The hide/block relationship state IS persisted (fjall users keyspace) and
//! is rebuilt by `rebuild_entity_state` on open. This test verifies that the
//! rebuilt hide/block state correctly excludes items from RETRIEVE results.
//!
//! Tests:
//! 1. Hidden items never returned after restart.
//! 2. Blocked creator content never returned after restart.
//! 3. Combined hide + block after restart.
//! 4. Property test: random hides/blocks survive restart.
//! 5. Hard negatives from direct hide survive restart.
#![allow(
clippy::unwrap_used,
clippy::cast_precision_loss,
clippy::too_many_lines
)]
use std::collections::{HashMap, HashSet};
use std::time::Duration;
use proptest::prelude::*;
use tempfile::tempdir;
use tidaldb::TidalDb;
use tidaldb::entities::RelationshipType;
use tidaldb::query::retrieve::Retrieve;
use tidaldb::schema::{DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window};
// ── Schema ──────────────────────────────────────────────────────────────────
fn invariant_schema() -> tidaldb::schema::Schema {
let mut builder = SchemaBuilder::new();
for &(name, half_life_days) in &[
("view", 7),
("like", 14),
("skip", 1),
("hide", 1),
("block", 1),
] {
let _ = builder
.signal(
name,
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(half_life_days * 24 * 3600),
},
)
.windows(&[Window::AllTime])
.velocity(false)
.add();
}
builder.build().expect("invariant schema must be valid")
}
// ── Helpers ─────────────────────────────────────────────────────────────────
/// Creator assignment function: `creator_id = (item_id % 5) + 1`.
const fn default_creator(id: u64) -> u64 {
(id % 5) + 1
}
/// Build item metadata for a given item ID and creator assignment function.
fn item_metadata<F>(id: u64, creator_fn: &F) -> HashMap<String, String>
where
F: Fn(u64) -> u64,
{
let creator_id = creator_fn(id);
let mut meta = HashMap::new();
meta.insert("title".to_string(), format!("Item {id}"));
meta.insert("category".to_string(), "jazz".to_string());
meta.insert("format".to_string(), "audio".to_string());
meta.insert("creator_id".to_string(), creator_id.to_string());
meta
}
/// Write items with `creator_id` metadata and a view signal so they appear in
/// RETRIEVE results. Items are assigned to creators via `creator_fn(item_id)`.
fn write_items<F>(db: &TidalDb, item_ids: &[u64], now: Timestamp, creator_fn: &F)
where
F: Fn(u64) -> u64,
{
for &id in item_ids {
let meta = item_metadata(id, creator_fn);
db.write_item_with_metadata(EntityId::new(id), &meta)
.unwrap();
// Signal so the item has ranking weight.
db.signal("view", EntityId::new(id), 1.0, now).unwrap();
}
}
/// Repopulate in-memory indexes after reopen.
///
/// In-memory indexes (universe bitmap, category/format/range bitmaps) are NOT
/// persisted. After reopen, the item data exists in fjall storage but the
/// in-memory indexes are empty. This function re-writes item metadata to
/// repopulate the indexes, making items discoverable by RETRIEVE.
///
/// Signal state IS restored from checkpoint + WAL replay, so we do not need
/// to re-signal items.
fn repopulate_items<F>(db: &TidalDb, item_ids: &[u64], creator_fn: &F)
where
F: Fn(u64) -> u64,
{
for &id in item_ids {
let meta = item_metadata(id, creator_fn);
db.write_item_with_metadata(EntityId::new(id), &meta)
.unwrap();
}
}
/// RETRIEVE with FOR USER, using the "new" profile (sorts by entity ID desc,
/// no signal requirements). Returns the set of item IDs in results.
fn retrieve_for_user(db: &TidalDb, user_id: u64, limit: usize) -> HashSet<u64> {
let query = Retrieve::builder()
.profile("new")
.for_user(user_id)
.limit(limit)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
results.items.iter().map(|r| r.entity_id.as_u64()).collect()
}
/// Open a persistent DB at the given directory with the invariant schema.
fn open_persistent(dir: &std::path::Path) -> TidalDb {
TidalDb::builder()
.with_data_dir(dir)
.with_schema(invariant_schema())
.open()
.expect("persistent open should succeed")
}
// ── Test 1: Hidden items never returned after restart ───────────────────────
#[test]
fn hidden_items_never_returned_after_restart() {
let dir = tempdir().unwrap();
let now = Timestamp::now();
let user_id = 1001u64;
let hidden_ids: HashSet<u64> = [3, 7, 15, 22].into_iter().collect();
let item_ids: Vec<u64> = (1..=30).collect();
// Phase 1: populate + hide + verify + close.
{
let db = open_persistent(dir.path());
write_items(&db, &item_ids, now, &default_creator);
// Write user so user-context filtering is active.
let user_meta = HashMap::new();
db.write_user(EntityId::new(user_id), &user_meta).unwrap();
// Hide specific items.
for &hid in &hidden_ids {
db.write_relationship(
EntityId::new(user_id),
RelationshipType::Hide,
EntityId::new(hid),
1.0,
now,
)
.unwrap();
}
// Pre-crash verification: hidden items must not appear.
let pre_results = retrieve_for_user(&db, user_id, 100);
for &hid in &hidden_ids {
assert!(
!pre_results.contains(&hid),
"pre-crash: hidden item {hid} should not appear in results"
);
}
assert!(
!pre_results.is_empty(),
"pre-crash: should return some items"
);
db.close().unwrap();
}
// Phase 2: reopen + repopulate indexes + verify invariant holds.
{
let db = open_persistent(dir.path());
// Repopulate in-memory indexes (universe bitmap etc.) so items are
// discoverable by RETRIEVE. Hide/block state was already rebuilt from
// durable relationship edges by rebuild_entity_state.
repopulate_items(&db, &item_ids, &default_creator);
let post_results = retrieve_for_user(&db, user_id, 100);
for &hid in &hidden_ids {
assert!(
!post_results.contains(&hid),
"post-restart: hidden item {hid} must not appear in results"
);
}
assert!(
!post_results.is_empty(),
"post-restart: should return some items"
);
// Verify all returned items are in the valid range and not hidden.
for &id in &post_results {
assert!(
(1..=30).contains(&id),
"post-restart: unexpected item id {id}"
);
assert!(
!hidden_ids.contains(&id),
"post-restart: hidden item {id} leaked into results"
);
}
db.close().unwrap();
}
}
// ── Test 2: Blocked creator content never returned after restart ─────────────
#[test]
fn blocked_creator_content_never_returned_after_restart() {
let dir = tempdir().unwrap();
let now = Timestamp::now();
let user_id = 1001u64;
let blocked_creator = 3u64;
let item_ids: Vec<u64> = (1..=30).collect();
// Phase 1: populate + block creator 3 + verify + close.
{
let db = open_persistent(dir.path());
// creator_id = (item_id % 5) + 1, so creator 3 owns items where
// (item_id % 5) + 1 == 3, i.e. item_id % 5 == 2: items 2, 7, 12, 17, 22, 27.
write_items(&db, &item_ids, now, &default_creator);
let user_meta = HashMap::new();
db.write_user(EntityId::new(user_id), &user_meta).unwrap();
// Block creator 3.
db.write_relationship(
EntityId::new(user_id),
RelationshipType::Blocks,
EntityId::new(blocked_creator),
1.0,
now,
)
.unwrap();
// Pre-crash verification.
let pre_results = retrieve_for_user(&db, user_id, 100);
let creator3_items: HashSet<u64> = (1..=30)
.filter(|&id| default_creator(id) == blocked_creator)
.collect();
for &cid in &creator3_items {
assert!(
!pre_results.contains(&cid),
"pre-crash: item {cid} from blocked creator {blocked_creator} should not appear"
);
}
assert!(
!pre_results.is_empty(),
"pre-crash: should return non-blocked items"
);
db.close().unwrap();
}
// Phase 2: reopen + repopulate + verify.
{
let db = open_persistent(dir.path());
repopulate_items(&db, &item_ids, &default_creator);
let post_results = retrieve_for_user(&db, user_id, 100);
let creator3_items: HashSet<u64> = (1..=30)
.filter(|&id| default_creator(id) == blocked_creator)
.collect();
for &cid in &creator3_items {
assert!(
!post_results.contains(&cid),
"post-restart: item {cid} from blocked creator {blocked_creator} must not appear"
);
}
assert!(
!post_results.is_empty(),
"post-restart: should return non-blocked items"
);
// Verify all returned items are from non-blocked creators.
for &id in &post_results {
let creator_id = default_creator(id);
assert_ne!(
creator_id, blocked_creator,
"post-restart: item {id} from blocked creator {blocked_creator} leaked"
);
}
db.close().unwrap();
}
}
// ── Test 3: Combined hide and block after restart ───────────────────────────
#[test]
fn combined_hide_and_block_after_restart() {
let dir = tempdir().unwrap();
let now = Timestamp::now();
let user_id = 1001u64;
let hidden_ids: HashSet<u64> = [5, 10, 25].into_iter().collect();
let blocked_creator = 2u64;
let item_ids: Vec<u64> = (1..=30).collect();
// Phase 1: populate + hide items + block creator + verify + close.
{
let db = open_persistent(dir.path());
write_items(&db, &item_ids, now, &default_creator);
let user_meta = HashMap::new();
db.write_user(EntityId::new(user_id), &user_meta).unwrap();
// Hide specific items.
for &hid in &hidden_ids {
db.write_relationship(
EntityId::new(user_id),
RelationshipType::Hide,
EntityId::new(hid),
1.0,
now,
)
.unwrap();
}
// Block creator 2: owns items where (id % 5) + 1 == 2, i.e. id % 5 == 1:
// items 1, 6, 11, 16, 21, 26.
db.write_relationship(
EntityId::new(user_id),
RelationshipType::Blocks,
EntityId::new(blocked_creator),
1.0,
now,
)
.unwrap();
// Pre-crash verification.
let pre_results = retrieve_for_user(&db, user_id, 100);
let creator2_items: HashSet<u64> = (1..=30)
.filter(|&id| default_creator(id) == blocked_creator)
.collect();
let all_excluded: HashSet<u64> = hidden_ids.union(&creator2_items).copied().collect();
for &ex in &all_excluded {
assert!(
!pre_results.contains(&ex),
"pre-crash: excluded item {ex} should not appear"
);
}
assert!(
!pre_results.is_empty(),
"pre-crash: should return some items"
);
db.close().unwrap();
}
// Phase 2: reopen + repopulate + verify.
{
let db = open_persistent(dir.path());
repopulate_items(&db, &item_ids, &default_creator);
let post_results = retrieve_for_user(&db, user_id, 100);
let creator2_items: HashSet<u64> = (1..=30)
.filter(|&id| default_creator(id) == blocked_creator)
.collect();
let all_excluded: HashSet<u64> = hidden_ids.union(&creator2_items).copied().collect();
for &ex in &all_excluded {
assert!(
!post_results.contains(&ex),
"post-restart: excluded item {ex} must not appear in results"
);
}
assert!(
!post_results.is_empty(),
"post-restart: should return some items"
);
// Every returned item must be from a non-blocked creator and not hidden.
for &id in &post_results {
let creator_id = default_creator(id);
assert_ne!(
creator_id, blocked_creator,
"post-restart: item {id} from blocked creator leaked"
);
assert!(
!hidden_ids.contains(&id),
"post-restart: hidden item {id} leaked"
);
}
db.close().unwrap();
}
}
// ── Test 4: Property test -- random hides/blocks survive restart ─────────────
proptest! {
// 100 cases = spec minimum from task-08; parameter space is small so each
// case completes in ~1s, giving ~100s total -- acceptable for CI.
#![proptest_config(ProptestConfig { cases: 100, max_shrink_iters: 50, ..Default::default() })]
#[test]
fn no_phantom_items_after_restart(
hide_count in 0usize..8,
blocked_creator_idx in 0u64..5,
) {
let dir = tempdir().unwrap();
let now = Timestamp::now();
let user_id = 1001u64;
let total_items = 30u64;
let item_ids: Vec<u64> = (1..=total_items).collect();
// Deterministically select which items to hide: first `hide_count` items.
let hidden_ids: HashSet<u64> = (1..=total_items)
.filter(|id| (*id as usize) <= hide_count)
.collect();
// Creator to block: one of creators 1-5.
let blocked_creator = blocked_creator_idx + 1;
// Phase 1: populate, hide, block, close.
{
let db = open_persistent(dir.path());
write_items(&db, &item_ids, now, &default_creator);
let user_meta = HashMap::new();
db.write_user(EntityId::new(user_id), &user_meta).unwrap();
for &hid in &hidden_ids {
db.write_relationship(
EntityId::new(user_id),
RelationshipType::Hide,
EntityId::new(hid),
1.0,
now,
)
.unwrap();
}
db.write_relationship(
EntityId::new(user_id),
RelationshipType::Blocks,
EntityId::new(blocked_creator),
1.0,
now,
)
.unwrap();
db.close().unwrap();
}
// Phase 2: reopen + repopulate + verify.
{
let db = open_persistent(dir.path());
repopulate_items(&db, &item_ids, &default_creator);
let results = retrieve_for_user(&db, user_id, 100);
// No hidden items must appear.
for &hid in &hidden_ids {
prop_assert!(
!results.contains(&hid),
"hidden item {hid} appeared after restart"
);
}
// No items from blocked creator must appear.
let blocked_items: HashSet<u64> = (1..=total_items)
.filter(|&id| default_creator(id) == blocked_creator)
.collect();
for &bid in &blocked_items {
prop_assert!(
!results.contains(&bid),
"item {bid} from blocked creator {blocked_creator} appeared after restart"
);
}
// Results should be non-empty (at least some items are neither hidden
// nor from the blocked creator -- unless all 30 are excluded, which is
// possible with hide_count=7 + blocked_creator covering 6 items = 13
// excluded, leaving 17 items).
let total_excluded = hidden_ids.len() + blocked_items.len()
- hidden_ids.intersection(&blocked_items).count();
if total_excluded < total_items as usize {
prop_assert!(
!results.is_empty(),
"expected non-empty results when {} items are excluded out of {}",
total_excluded,
total_items
);
}
db.close().unwrap();
}
}
}
// ── Test 5: Hard negatives from session survive restart ──────────────────────
#[test]
fn hard_negatives_from_session_survive_restart() {
let dir = tempdir().unwrap();
let now = Timestamp::now();
let user_id = 1001u64;
let hidden_via_hide = [4u64, 8, 16];
let item_ids: Vec<u64> = (1..=20).collect();
let creator_fn = |id: u64| (id % 3) + 1;
// Phase 1: populate, hide items via write_relationship, close.
{
let db = open_persistent(dir.path());
write_items(&db, &item_ids, now, &creator_fn);
let user_meta = HashMap::new();
db.write_user(EntityId::new(user_id), &user_meta).unwrap();
// Hide items via the RelationshipType::Hide path (this is the durable
// path used by sessions when they hide items).
for &hid in &hidden_via_hide {
db.write_relationship(
EntityId::new(user_id),
RelationshipType::Hide,
EntityId::new(hid),
1.0,
now,
)
.unwrap();
}
// Verify pre-close.
let pre_results = retrieve_for_user(&db, user_id, 100);
for &hid in &hidden_via_hide {
assert!(
!pre_results.contains(&hid),
"pre-close: hidden item {hid} should not appear"
);
}
assert!(
!pre_results.is_empty(),
"pre-close: should return some items"
);
db.close().unwrap();
}
// Phase 2: reopen + repopulate + verify hidden items stay hidden.
{
let db = open_persistent(dir.path());
repopulate_items(&db, &item_ids, &creator_fn);
let post_results = retrieve_for_user(&db, user_id, 100);
for &hid in &hidden_via_hide {
assert!(
!post_results.contains(&hid),
"post-restart: hidden item {hid} must not appear in results"
);
}
assert!(
!post_results.is_empty(),
"post-restart: should return some items"
);
// Verify the actual user state was rebuilt correctly.
let hidden_bitmap = db.user_state().hidden_items(user_id);
for &hid in &hidden_via_hide {
#[allow(clippy::cast_possible_truncation)]
let hid_u32 = hid as u32;
assert!(
hidden_bitmap.contains(hid_u32),
"post-restart: user_state hidden_items should contain {hid}"
);
}
db.close().unwrap();
}
}