601 lines
20 KiB
Rust
601 lines
20 KiB
Rust
//! Milestone 7 Task 08: Hard Negative Crash Invariant Tests.
|
|
//!
|
|
//! Proves that after any close + reopen cycle, RETRIEVE never returns hidden
|
|
//! items or blocked-creator content. The invariant under test:
|
|
//!
|
|
//! If user U has hidden item X or blocked creator C, then after close + reopen,
|
|
//! `RETRIEVE ... FOR USER @U` must NEVER return item X or items from creator C.
|
|
//!
|
|
//! Design note: In-memory indexes (universe bitmap, category/format/range) are
|
|
//! NOT persisted and must be rebuilt on reopen. After reopen, we call
|
|
//! `repopulate_items` to re-write item metadata into the in-memory indexes.
|
|
//! The hide/block relationship state IS persisted (fjall users keyspace) and
|
|
//! is rebuilt by `rebuild_entity_state` on open. This test verifies that the
|
|
//! rebuilt hide/block state correctly excludes items from RETRIEVE results.
|
|
//!
|
|
//! Tests:
|
|
//! 1. Hidden items never returned after restart.
|
|
//! 2. Blocked creator content never returned after restart.
|
|
//! 3. Combined hide + block after restart.
|
|
//! 4. Property test: random hides/blocks survive restart.
|
|
//! 5. Hard negatives from direct hide survive restart.
|
|
|
|
#![allow(
|
|
clippy::unwrap_used,
|
|
clippy::cast_precision_loss,
|
|
clippy::too_many_lines
|
|
)]
|
|
|
|
use std::collections::{HashMap, HashSet};
|
|
use std::time::Duration;
|
|
|
|
use proptest::prelude::*;
|
|
use tempfile::tempdir;
|
|
|
|
use tidaldb::TidalDb;
|
|
use tidaldb::entities::RelationshipType;
|
|
use tidaldb::query::retrieve::Retrieve;
|
|
use tidaldb::schema::{DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window};
|
|
|
|
// ── Schema ──────────────────────────────────────────────────────────────────
|
|
|
|
fn invariant_schema() -> tidaldb::schema::Schema {
|
|
let mut builder = SchemaBuilder::new();
|
|
for &(name, half_life_days) in &[
|
|
("view", 7),
|
|
("like", 14),
|
|
("skip", 1),
|
|
("hide", 1),
|
|
("block", 1),
|
|
] {
|
|
let _ = builder
|
|
.signal(
|
|
name,
|
|
EntityKind::Item,
|
|
DecaySpec::Exponential {
|
|
half_life: Duration::from_secs(half_life_days * 24 * 3600),
|
|
},
|
|
)
|
|
.windows(&[Window::AllTime])
|
|
.velocity(false)
|
|
.add();
|
|
}
|
|
builder.build().expect("invariant schema must be valid")
|
|
}
|
|
|
|
// ── Helpers ─────────────────────────────────────────────────────────────────
|
|
|
|
/// Creator assignment function: `creator_id = (item_id % 5) + 1`.
|
|
const fn default_creator(id: u64) -> u64 {
|
|
(id % 5) + 1
|
|
}
|
|
|
|
/// Build item metadata for a given item ID and creator assignment function.
|
|
fn item_metadata<F>(id: u64, creator_fn: &F) -> HashMap<String, String>
|
|
where
|
|
F: Fn(u64) -> u64,
|
|
{
|
|
let creator_id = creator_fn(id);
|
|
let mut meta = HashMap::new();
|
|
meta.insert("title".to_string(), format!("Item {id}"));
|
|
meta.insert("category".to_string(), "jazz".to_string());
|
|
meta.insert("format".to_string(), "audio".to_string());
|
|
meta.insert("creator_id".to_string(), creator_id.to_string());
|
|
meta
|
|
}
|
|
|
|
/// Write items with `creator_id` metadata and a view signal so they appear in
|
|
/// RETRIEVE results. Items are assigned to creators via `creator_fn(item_id)`.
|
|
fn write_items<F>(db: &TidalDb, item_ids: &[u64], now: Timestamp, creator_fn: &F)
|
|
where
|
|
F: Fn(u64) -> u64,
|
|
{
|
|
for &id in item_ids {
|
|
let meta = item_metadata(id, creator_fn);
|
|
db.write_item_with_metadata(EntityId::new(id), &meta)
|
|
.unwrap();
|
|
// Signal so the item has ranking weight.
|
|
db.signal("view", EntityId::new(id), 1.0, now).unwrap();
|
|
}
|
|
}
|
|
|
|
/// Repopulate in-memory indexes after reopen.
|
|
///
|
|
/// In-memory indexes (universe bitmap, category/format/range bitmaps) are NOT
|
|
/// persisted. After reopen, the item data exists in fjall storage but the
|
|
/// in-memory indexes are empty. This function re-writes item metadata to
|
|
/// repopulate the indexes, making items discoverable by RETRIEVE.
|
|
///
|
|
/// Signal state IS restored from checkpoint + WAL replay, so we do not need
|
|
/// to re-signal items.
|
|
fn repopulate_items<F>(db: &TidalDb, item_ids: &[u64], creator_fn: &F)
|
|
where
|
|
F: Fn(u64) -> u64,
|
|
{
|
|
for &id in item_ids {
|
|
let meta = item_metadata(id, creator_fn);
|
|
db.write_item_with_metadata(EntityId::new(id), &meta)
|
|
.unwrap();
|
|
}
|
|
}
|
|
|
|
/// RETRIEVE with FOR USER, using the "new" profile (sorts by entity ID desc,
|
|
/// no signal requirements). Returns the set of item IDs in results.
|
|
fn retrieve_for_user(db: &TidalDb, user_id: u64, limit: usize) -> HashSet<u64> {
|
|
let query = Retrieve::builder()
|
|
.profile("new")
|
|
.for_user(user_id)
|
|
.limit(limit)
|
|
.build()
|
|
.unwrap();
|
|
let results = db.retrieve(&query).unwrap();
|
|
results.items.iter().map(|r| r.entity_id.as_u64()).collect()
|
|
}
|
|
|
|
/// Open a persistent DB at the given directory with the invariant schema.
|
|
fn open_persistent(dir: &std::path::Path) -> TidalDb {
|
|
TidalDb::builder()
|
|
.with_data_dir(dir)
|
|
.with_schema(invariant_schema())
|
|
.open()
|
|
.expect("persistent open should succeed")
|
|
}
|
|
|
|
// ── Test 1: Hidden items never returned after restart ───────────────────────
|
|
|
|
#[test]
|
|
fn hidden_items_never_returned_after_restart() {
|
|
let dir = tempdir().unwrap();
|
|
let now = Timestamp::now();
|
|
let user_id = 1001u64;
|
|
let hidden_ids: HashSet<u64> = [3, 7, 15, 22].into_iter().collect();
|
|
let item_ids: Vec<u64> = (1..=30).collect();
|
|
|
|
// Phase 1: populate + hide + verify + close.
|
|
{
|
|
let db = open_persistent(dir.path());
|
|
|
|
write_items(&db, &item_ids, now, &default_creator);
|
|
|
|
// Write user so user-context filtering is active.
|
|
let user_meta = HashMap::new();
|
|
db.write_user(EntityId::new(user_id), &user_meta).unwrap();
|
|
|
|
// Hide specific items.
|
|
for &hid in &hidden_ids {
|
|
db.write_relationship(
|
|
EntityId::new(user_id),
|
|
RelationshipType::Hide,
|
|
EntityId::new(hid),
|
|
1.0,
|
|
now,
|
|
)
|
|
.unwrap();
|
|
}
|
|
|
|
// Pre-crash verification: hidden items must not appear.
|
|
let pre_results = retrieve_for_user(&db, user_id, 100);
|
|
for &hid in &hidden_ids {
|
|
assert!(
|
|
!pre_results.contains(&hid),
|
|
"pre-crash: hidden item {hid} should not appear in results"
|
|
);
|
|
}
|
|
assert!(
|
|
!pre_results.is_empty(),
|
|
"pre-crash: should return some items"
|
|
);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 2: reopen + repopulate indexes + verify invariant holds.
|
|
{
|
|
let db = open_persistent(dir.path());
|
|
|
|
// Repopulate in-memory indexes (universe bitmap etc.) so items are
|
|
// discoverable by RETRIEVE. Hide/block state was already rebuilt from
|
|
// durable relationship edges by rebuild_entity_state.
|
|
repopulate_items(&db, &item_ids, &default_creator);
|
|
|
|
let post_results = retrieve_for_user(&db, user_id, 100);
|
|
for &hid in &hidden_ids {
|
|
assert!(
|
|
!post_results.contains(&hid),
|
|
"post-restart: hidden item {hid} must not appear in results"
|
|
);
|
|
}
|
|
assert!(
|
|
!post_results.is_empty(),
|
|
"post-restart: should return some items"
|
|
);
|
|
|
|
// Verify all returned items are in the valid range and not hidden.
|
|
for &id in &post_results {
|
|
assert!(
|
|
(1..=30).contains(&id),
|
|
"post-restart: unexpected item id {id}"
|
|
);
|
|
assert!(
|
|
!hidden_ids.contains(&id),
|
|
"post-restart: hidden item {id} leaked into results"
|
|
);
|
|
}
|
|
|
|
db.close().unwrap();
|
|
}
|
|
}
|
|
|
|
// ── Test 2: Blocked creator content never returned after restart ─────────────
|
|
|
|
#[test]
|
|
fn blocked_creator_content_never_returned_after_restart() {
|
|
let dir = tempdir().unwrap();
|
|
let now = Timestamp::now();
|
|
let user_id = 1001u64;
|
|
let blocked_creator = 3u64;
|
|
let item_ids: Vec<u64> = (1..=30).collect();
|
|
|
|
// Phase 1: populate + block creator 3 + verify + close.
|
|
{
|
|
let db = open_persistent(dir.path());
|
|
|
|
// creator_id = (item_id % 5) + 1, so creator 3 owns items where
|
|
// (item_id % 5) + 1 == 3, i.e. item_id % 5 == 2: items 2, 7, 12, 17, 22, 27.
|
|
write_items(&db, &item_ids, now, &default_creator);
|
|
|
|
let user_meta = HashMap::new();
|
|
db.write_user(EntityId::new(user_id), &user_meta).unwrap();
|
|
|
|
// Block creator 3.
|
|
db.write_relationship(
|
|
EntityId::new(user_id),
|
|
RelationshipType::Blocks,
|
|
EntityId::new(blocked_creator),
|
|
1.0,
|
|
now,
|
|
)
|
|
.unwrap();
|
|
|
|
// Pre-crash verification.
|
|
let pre_results = retrieve_for_user(&db, user_id, 100);
|
|
let creator3_items: HashSet<u64> = (1..=30)
|
|
.filter(|&id| default_creator(id) == blocked_creator)
|
|
.collect();
|
|
|
|
for &cid in &creator3_items {
|
|
assert!(
|
|
!pre_results.contains(&cid),
|
|
"pre-crash: item {cid} from blocked creator {blocked_creator} should not appear"
|
|
);
|
|
}
|
|
assert!(
|
|
!pre_results.is_empty(),
|
|
"pre-crash: should return non-blocked items"
|
|
);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 2: reopen + repopulate + verify.
|
|
{
|
|
let db = open_persistent(dir.path());
|
|
repopulate_items(&db, &item_ids, &default_creator);
|
|
|
|
let post_results = retrieve_for_user(&db, user_id, 100);
|
|
let creator3_items: HashSet<u64> = (1..=30)
|
|
.filter(|&id| default_creator(id) == blocked_creator)
|
|
.collect();
|
|
|
|
for &cid in &creator3_items {
|
|
assert!(
|
|
!post_results.contains(&cid),
|
|
"post-restart: item {cid} from blocked creator {blocked_creator} must not appear"
|
|
);
|
|
}
|
|
assert!(
|
|
!post_results.is_empty(),
|
|
"post-restart: should return non-blocked items"
|
|
);
|
|
|
|
// Verify all returned items are from non-blocked creators.
|
|
for &id in &post_results {
|
|
let creator_id = default_creator(id);
|
|
assert_ne!(
|
|
creator_id, blocked_creator,
|
|
"post-restart: item {id} from blocked creator {blocked_creator} leaked"
|
|
);
|
|
}
|
|
|
|
db.close().unwrap();
|
|
}
|
|
}
|
|
|
|
// ── Test 3: Combined hide and block after restart ───────────────────────────
|
|
|
|
#[test]
|
|
fn combined_hide_and_block_after_restart() {
|
|
let dir = tempdir().unwrap();
|
|
let now = Timestamp::now();
|
|
let user_id = 1001u64;
|
|
let hidden_ids: HashSet<u64> = [5, 10, 25].into_iter().collect();
|
|
let blocked_creator = 2u64;
|
|
let item_ids: Vec<u64> = (1..=30).collect();
|
|
|
|
// Phase 1: populate + hide items + block creator + verify + close.
|
|
{
|
|
let db = open_persistent(dir.path());
|
|
|
|
write_items(&db, &item_ids, now, &default_creator);
|
|
|
|
let user_meta = HashMap::new();
|
|
db.write_user(EntityId::new(user_id), &user_meta).unwrap();
|
|
|
|
// Hide specific items.
|
|
for &hid in &hidden_ids {
|
|
db.write_relationship(
|
|
EntityId::new(user_id),
|
|
RelationshipType::Hide,
|
|
EntityId::new(hid),
|
|
1.0,
|
|
now,
|
|
)
|
|
.unwrap();
|
|
}
|
|
|
|
// Block creator 2: owns items where (id % 5) + 1 == 2, i.e. id % 5 == 1:
|
|
// items 1, 6, 11, 16, 21, 26.
|
|
db.write_relationship(
|
|
EntityId::new(user_id),
|
|
RelationshipType::Blocks,
|
|
EntityId::new(blocked_creator),
|
|
1.0,
|
|
now,
|
|
)
|
|
.unwrap();
|
|
|
|
// Pre-crash verification.
|
|
let pre_results = retrieve_for_user(&db, user_id, 100);
|
|
|
|
let creator2_items: HashSet<u64> = (1..=30)
|
|
.filter(|&id| default_creator(id) == blocked_creator)
|
|
.collect();
|
|
let all_excluded: HashSet<u64> = hidden_ids.union(&creator2_items).copied().collect();
|
|
|
|
for &ex in &all_excluded {
|
|
assert!(
|
|
!pre_results.contains(&ex),
|
|
"pre-crash: excluded item {ex} should not appear"
|
|
);
|
|
}
|
|
assert!(
|
|
!pre_results.is_empty(),
|
|
"pre-crash: should return some items"
|
|
);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 2: reopen + repopulate + verify.
|
|
{
|
|
let db = open_persistent(dir.path());
|
|
repopulate_items(&db, &item_ids, &default_creator);
|
|
|
|
let post_results = retrieve_for_user(&db, user_id, 100);
|
|
let creator2_items: HashSet<u64> = (1..=30)
|
|
.filter(|&id| default_creator(id) == blocked_creator)
|
|
.collect();
|
|
let all_excluded: HashSet<u64> = hidden_ids.union(&creator2_items).copied().collect();
|
|
|
|
for &ex in &all_excluded {
|
|
assert!(
|
|
!post_results.contains(&ex),
|
|
"post-restart: excluded item {ex} must not appear in results"
|
|
);
|
|
}
|
|
assert!(
|
|
!post_results.is_empty(),
|
|
"post-restart: should return some items"
|
|
);
|
|
|
|
// Every returned item must be from a non-blocked creator and not hidden.
|
|
for &id in &post_results {
|
|
let creator_id = default_creator(id);
|
|
assert_ne!(
|
|
creator_id, blocked_creator,
|
|
"post-restart: item {id} from blocked creator leaked"
|
|
);
|
|
assert!(
|
|
!hidden_ids.contains(&id),
|
|
"post-restart: hidden item {id} leaked"
|
|
);
|
|
}
|
|
|
|
db.close().unwrap();
|
|
}
|
|
}
|
|
|
|
// ── Test 4: Property test -- random hides/blocks survive restart ─────────────
|
|
|
|
proptest! {
|
|
// 100 cases = spec minimum from task-08; parameter space is small so each
|
|
// case completes in ~1s, giving ~100s total -- acceptable for CI.
|
|
#![proptest_config(ProptestConfig { cases: 100, max_shrink_iters: 50, ..Default::default() })]
|
|
|
|
#[test]
|
|
fn no_phantom_items_after_restart(
|
|
hide_count in 0usize..8,
|
|
blocked_creator_idx in 0u64..5,
|
|
) {
|
|
let dir = tempdir().unwrap();
|
|
let now = Timestamp::now();
|
|
let user_id = 1001u64;
|
|
let total_items = 30u64;
|
|
let item_ids: Vec<u64> = (1..=total_items).collect();
|
|
|
|
// Deterministically select which items to hide: first `hide_count` items.
|
|
let hidden_ids: HashSet<u64> = (1..=total_items)
|
|
.filter(|id| (*id as usize) <= hide_count)
|
|
.collect();
|
|
|
|
// Creator to block: one of creators 1-5.
|
|
let blocked_creator = blocked_creator_idx + 1;
|
|
|
|
// Phase 1: populate, hide, block, close.
|
|
{
|
|
let db = open_persistent(dir.path());
|
|
write_items(&db, &item_ids, now, &default_creator);
|
|
|
|
let user_meta = HashMap::new();
|
|
db.write_user(EntityId::new(user_id), &user_meta).unwrap();
|
|
|
|
for &hid in &hidden_ids {
|
|
db.write_relationship(
|
|
EntityId::new(user_id),
|
|
RelationshipType::Hide,
|
|
EntityId::new(hid),
|
|
1.0,
|
|
now,
|
|
)
|
|
.unwrap();
|
|
}
|
|
|
|
db.write_relationship(
|
|
EntityId::new(user_id),
|
|
RelationshipType::Blocks,
|
|
EntityId::new(blocked_creator),
|
|
1.0,
|
|
now,
|
|
)
|
|
.unwrap();
|
|
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 2: reopen + repopulate + verify.
|
|
{
|
|
let db = open_persistent(dir.path());
|
|
repopulate_items(&db, &item_ids, &default_creator);
|
|
|
|
let results = retrieve_for_user(&db, user_id, 100);
|
|
|
|
// No hidden items must appear.
|
|
for &hid in &hidden_ids {
|
|
prop_assert!(
|
|
!results.contains(&hid),
|
|
"hidden item {hid} appeared after restart"
|
|
);
|
|
}
|
|
|
|
// No items from blocked creator must appear.
|
|
let blocked_items: HashSet<u64> = (1..=total_items)
|
|
.filter(|&id| default_creator(id) == blocked_creator)
|
|
.collect();
|
|
for &bid in &blocked_items {
|
|
prop_assert!(
|
|
!results.contains(&bid),
|
|
"item {bid} from blocked creator {blocked_creator} appeared after restart"
|
|
);
|
|
}
|
|
|
|
// Results should be non-empty (at least some items are neither hidden
|
|
// nor from the blocked creator -- unless all 30 are excluded, which is
|
|
// possible with hide_count=7 + blocked_creator covering 6 items = 13
|
|
// excluded, leaving 17 items).
|
|
let total_excluded = hidden_ids.len() + blocked_items.len()
|
|
- hidden_ids.intersection(&blocked_items).count();
|
|
if total_excluded < total_items as usize {
|
|
prop_assert!(
|
|
!results.is_empty(),
|
|
"expected non-empty results when {} items are excluded out of {}",
|
|
total_excluded,
|
|
total_items
|
|
);
|
|
}
|
|
|
|
db.close().unwrap();
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── Test 5: Hard negatives from session survive restart ──────────────────────
|
|
|
|
#[test]
|
|
fn hard_negatives_from_session_survive_restart() {
|
|
let dir = tempdir().unwrap();
|
|
let now = Timestamp::now();
|
|
let user_id = 1001u64;
|
|
let hidden_via_hide = [4u64, 8, 16];
|
|
let item_ids: Vec<u64> = (1..=20).collect();
|
|
let creator_fn = |id: u64| (id % 3) + 1;
|
|
|
|
// Phase 1: populate, hide items via write_relationship, close.
|
|
{
|
|
let db = open_persistent(dir.path());
|
|
|
|
write_items(&db, &item_ids, now, &creator_fn);
|
|
|
|
let user_meta = HashMap::new();
|
|
db.write_user(EntityId::new(user_id), &user_meta).unwrap();
|
|
|
|
// Hide items via the RelationshipType::Hide path (this is the durable
|
|
// path used by sessions when they hide items).
|
|
for &hid in &hidden_via_hide {
|
|
db.write_relationship(
|
|
EntityId::new(user_id),
|
|
RelationshipType::Hide,
|
|
EntityId::new(hid),
|
|
1.0,
|
|
now,
|
|
)
|
|
.unwrap();
|
|
}
|
|
|
|
// Verify pre-close.
|
|
let pre_results = retrieve_for_user(&db, user_id, 100);
|
|
for &hid in &hidden_via_hide {
|
|
assert!(
|
|
!pre_results.contains(&hid),
|
|
"pre-close: hidden item {hid} should not appear"
|
|
);
|
|
}
|
|
assert!(
|
|
!pre_results.is_empty(),
|
|
"pre-close: should return some items"
|
|
);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 2: reopen + repopulate + verify hidden items stay hidden.
|
|
{
|
|
let db = open_persistent(dir.path());
|
|
repopulate_items(&db, &item_ids, &creator_fn);
|
|
|
|
let post_results = retrieve_for_user(&db, user_id, 100);
|
|
for &hid in &hidden_via_hide {
|
|
assert!(
|
|
!post_results.contains(&hid),
|
|
"post-restart: hidden item {hid} must not appear in results"
|
|
);
|
|
}
|
|
assert!(
|
|
!post_results.is_empty(),
|
|
"post-restart: should return some items"
|
|
);
|
|
|
|
// Verify the actual user state was rebuilt correctly.
|
|
let hidden_bitmap = db.user_state().hidden_items(user_id);
|
|
for &hid in &hidden_via_hide {
|
|
#[allow(clippy::cast_possible_truncation)]
|
|
let hid_u32 = hid as u32;
|
|
assert!(
|
|
hidden_bitmap.contains(hid_u32),
|
|
"post-restart: user_state hidden_items should contain {hid}"
|
|
);
|
|
}
|
|
|
|
db.close().unwrap();
|
|
}
|
|
}
|