1261 lines
41 KiB
Rust
1261 lines
41 KiB
Rust
//! Milestone 7 Task 07: Crash Fencing for M6 State.
|
|
//!
|
|
//! Property tests and integration tests verifying that all M6 state surfaces
|
|
//! survive a clean close + reopen cycle. Each test writes state through the
|
|
//! public `TidalDb` API, cleanly shuts down (triggering checkpoint), reopens
|
|
//! from the same data directory, and asserts all state is recoverable.
|
|
//!
|
|
//! Surfaces under test:
|
|
//! 1. `CohortSignalLedger` checkpoint/restore roundtrip.
|
|
//! 2. `CollectionIndex` persistence across restart.
|
|
//! 3. `CoEngagement` index checkpoint/restore across restart.
|
|
//! 4. Active session signal recovery via WAL replay.
|
|
//! 5. Mixed workload (items + signals + collections + cohorts) restart.
|
|
|
|
#![allow(
|
|
clippy::unwrap_used,
|
|
clippy::cast_precision_loss,
|
|
clippy::too_many_lines
|
|
)]
|
|
|
|
use std::collections::HashMap;
|
|
use std::time::Duration;
|
|
|
|
use tidaldb::TidalDb;
|
|
use tidaldb::cohort::{CohortDef, Predicate};
|
|
use tidaldb::entities::Visibility;
|
|
use tidaldb::schema::{
|
|
AgentPolicy, DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window,
|
|
};
|
|
|
|
// ── Schema helpers ──────────────────────────────────────────────────────────
|
|
|
|
/// Standard M6 schema with view, like, share signals and all windows enabled.
|
|
fn m6_schema() -> tidaldb::schema::Schema {
|
|
let mut builder = SchemaBuilder::new();
|
|
for &(name, half_life_days) in &[
|
|
("view", 7),
|
|
("like", 14),
|
|
("share", 7),
|
|
("skip", 1),
|
|
("completion", 14),
|
|
] {
|
|
let _ = builder
|
|
.signal(
|
|
name,
|
|
EntityKind::Item,
|
|
DecaySpec::Exponential {
|
|
half_life: Duration::from_secs(half_life_days * 24 * 3600),
|
|
},
|
|
)
|
|
.windows(&[
|
|
Window::OneHour,
|
|
Window::TwentyFourHours,
|
|
Window::SevenDays,
|
|
Window::AllTime,
|
|
])
|
|
.velocity(true)
|
|
.add();
|
|
}
|
|
builder.build().expect("m6 schema must be valid")
|
|
}
|
|
|
|
/// Schema with session policy for session recovery tests.
|
|
fn m6_session_schema() -> tidaldb::schema::Schema {
|
|
let mut builder = SchemaBuilder::new();
|
|
for &(name, half_life_days) in &[
|
|
("view", 7),
|
|
("like", 14),
|
|
("share", 7),
|
|
("skip", 1),
|
|
("completion", 14),
|
|
] {
|
|
let _ = builder
|
|
.signal(
|
|
name,
|
|
EntityKind::Item,
|
|
DecaySpec::Exponential {
|
|
half_life: Duration::from_secs(half_life_days * 24 * 3600),
|
|
},
|
|
)
|
|
.windows(&[
|
|
Window::OneHour,
|
|
Window::TwentyFourHours,
|
|
Window::SevenDays,
|
|
Window::AllTime,
|
|
])
|
|
.velocity(true)
|
|
.add();
|
|
}
|
|
builder.session_policy(
|
|
"default",
|
|
AgentPolicy {
|
|
allowed_signals: vec!["view".to_string(), "like".to_string(), "share".to_string()],
|
|
denied_signals: vec![],
|
|
max_session_duration: Duration::from_secs(3600),
|
|
max_signals_per_session: 1000,
|
|
},
|
|
);
|
|
builder.build().expect("m6 session schema must be valid")
|
|
}
|
|
|
|
fn item_metadata(category: &str, creator_id: u64) -> HashMap<String, String> {
|
|
let mut meta = HashMap::new();
|
|
meta.insert("category".to_string(), category.to_string());
|
|
meta.insert("format".to_string(), "video".to_string());
|
|
meta.insert("creator_id".to_string(), creator_id.to_string());
|
|
meta.insert(
|
|
"created_at".to_string(),
|
|
Timestamp::now().as_nanos().to_string(),
|
|
);
|
|
meta
|
|
}
|
|
|
|
fn user_metadata(locale: &str, primary_category: &str) -> HashMap<String, String> {
|
|
let mut m = HashMap::new();
|
|
m.insert("locale".to_string(), locale.to_string());
|
|
m.insert("primary_category".to_string(), primary_category.to_string());
|
|
m
|
|
}
|
|
|
|
// ── Test 1: Cohort signal ledger checkpoint/restore roundtrip ───────────────
|
|
//
|
|
// Defines cohorts, writes users and signals with user context, cleanly shuts
|
|
// down, reopens, and verifies that per-cohort windowed counts and decay scores
|
|
// are preserved.
|
|
|
|
#[test]
|
|
fn cohort_ledger_checkpoint_restore_roundtrip() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let schema = m6_schema();
|
|
|
|
// Phase 1: Define cohorts, write signals, checkpoint via close().
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema.clone())
|
|
.open()
|
|
.unwrap();
|
|
|
|
// Define two cohorts.
|
|
db.define_cohort(CohortDef {
|
|
name: "tech_en".to_string(),
|
|
predicate: Predicate::And(vec![
|
|
Predicate::Eq {
|
|
field: "locale".into(),
|
|
value: "en".into(),
|
|
},
|
|
Predicate::Eq {
|
|
field: "primary_category".into(),
|
|
value: "tech".into(),
|
|
},
|
|
]),
|
|
})
|
|
.unwrap();
|
|
|
|
db.define_cohort(CohortDef {
|
|
name: "sports".to_string(),
|
|
predicate: Predicate::Eq {
|
|
field: "primary_category".into(),
|
|
value: "sports".into(),
|
|
},
|
|
})
|
|
.unwrap();
|
|
|
|
// Write users.
|
|
db.write_user(EntityId::new(1001), &user_metadata("en", "tech"))
|
|
.unwrap();
|
|
db.write_user(EntityId::new(1002), &user_metadata("en", "sports"))
|
|
.unwrap();
|
|
|
|
// Write items.
|
|
for i in 1..=5u64 {
|
|
db.write_item_with_metadata(EntityId::new(i), &item_metadata("tech", 100))
|
|
.unwrap();
|
|
}
|
|
|
|
let ts = Timestamp::now();
|
|
|
|
// User 1001 (tech_en) views items 1, 2, 3 (3 signals each attributed to tech_en).
|
|
for i in 1..=3u64 {
|
|
db.signal_with_context("view", EntityId::new(i), 1.0, ts, Some(1001), Some(100))
|
|
.unwrap();
|
|
}
|
|
|
|
// User 1002 (sports) views items 2, 4 (attributed to sports cohort).
|
|
for &i in &[2u64, 4] {
|
|
db.signal_with_context("view", EntityId::new(i), 1.0, ts, Some(1002), Some(100))
|
|
.unwrap();
|
|
}
|
|
|
|
// Verify pre-close state.
|
|
let ledger = db.cohort_ledger();
|
|
let tech_1 = ledger
|
|
.read_windowed_count("tech_en", EntityId::new(1), "view", Window::AllTime)
|
|
.unwrap();
|
|
assert_eq!(tech_1, 1, "pre-close: tech_en item 1 should have 1 view");
|
|
|
|
let sports_4 = ledger
|
|
.read_windowed_count("sports", EntityId::new(4), "view", Window::AllTime)
|
|
.unwrap();
|
|
assert_eq!(sports_4, 1, "pre-close: sports item 4 should have 1 view");
|
|
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 2: Reopen and verify cohort ledger state survived.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema)
|
|
.open()
|
|
.unwrap();
|
|
|
|
// Cohort definitions should be restored (define_cohort with same name should fail).
|
|
let dup_result = db.define_cohort(CohortDef {
|
|
name: "tech_en".to_string(),
|
|
predicate: Predicate::Eq {
|
|
field: "x".into(),
|
|
value: "y".into(),
|
|
},
|
|
});
|
|
assert!(
|
|
dup_result.is_err(),
|
|
"tech_en cohort definition should survive restart"
|
|
);
|
|
|
|
let dup_result = db.define_cohort(CohortDef {
|
|
name: "sports".to_string(),
|
|
predicate: Predicate::Eq {
|
|
field: "x".into(),
|
|
value: "y".into(),
|
|
},
|
|
});
|
|
assert!(
|
|
dup_result.is_err(),
|
|
"sports cohort definition should survive restart"
|
|
);
|
|
|
|
// Verify cohort signal state was restored.
|
|
let ledger = db.cohort_ledger();
|
|
|
|
// tech_en: items 1, 2, 3 should each have 1 view.
|
|
for i in 1..=3u64 {
|
|
let count = ledger
|
|
.read_windowed_count("tech_en", EntityId::new(i), "view", Window::AllTime)
|
|
.unwrap();
|
|
assert_eq!(
|
|
count, 1,
|
|
"post-restart: tech_en item {i} should have 1 view, got {count}"
|
|
);
|
|
}
|
|
|
|
// tech_en: item 4 should have 0 views (only sports user viewed it).
|
|
let tech_4 = ledger
|
|
.read_windowed_count("tech_en", EntityId::new(4), "view", Window::AllTime)
|
|
.unwrap();
|
|
assert_eq!(
|
|
tech_4, 0,
|
|
"post-restart: tech_en item 4 should have 0 views"
|
|
);
|
|
|
|
// sports: items 2, 4 should each have 1 view.
|
|
for &i in &[2u64, 4] {
|
|
let count = ledger
|
|
.read_windowed_count("sports", EntityId::new(i), "view", Window::AllTime)
|
|
.unwrap();
|
|
assert_eq!(
|
|
count, 1,
|
|
"post-restart: sports item {i} should have 1 view, got {count}"
|
|
);
|
|
}
|
|
|
|
// sports: item 1 should have 0 views.
|
|
let sports_1 = ledger
|
|
.read_windowed_count("sports", EntityId::new(1), "view", Window::AllTime)
|
|
.unwrap();
|
|
assert_eq!(
|
|
sports_1, 0,
|
|
"post-restart: sports item 1 should have 0 views"
|
|
);
|
|
|
|
// Decay scores should be present and positive.
|
|
let tech_score = ledger
|
|
.read_decay_score("tech_en", EntityId::new(1), "view", 0)
|
|
.unwrap();
|
|
assert!(
|
|
tech_score.is_some(),
|
|
"post-restart: tech_en item 1 decay score should exist"
|
|
);
|
|
assert!(
|
|
tech_score.unwrap() > 0.0,
|
|
"post-restart: tech_en item 1 decay score should be positive"
|
|
);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
}
|
|
|
|
// ── Test 2: Collection index persistence across restart ─────────────────────
|
|
//
|
|
// Creates collections, adds items, cleanly shuts down, reopens, and verifies
|
|
// collection membership, item counts, and metadata survive.
|
|
|
|
#[test]
|
|
fn collection_persistence_across_restart() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let schema = m6_schema();
|
|
|
|
let owner = EntityId::new(42);
|
|
|
|
// Phase 1: Create collections and add items.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema.clone())
|
|
.open()
|
|
.unwrap();
|
|
|
|
// Create two collections.
|
|
let cid_1 = db
|
|
.create_collection(owner, "favorites", Visibility::Private)
|
|
.unwrap();
|
|
let cid_2 = db
|
|
.create_collection(owner, "watchlist", Visibility::Public)
|
|
.unwrap();
|
|
|
|
// Add items to collections.
|
|
db.add_to_collection(cid_1, EntityId::new(10)).unwrap();
|
|
db.add_to_collection(cid_1, EntityId::new(20)).unwrap();
|
|
db.add_to_collection(cid_1, EntityId::new(30)).unwrap();
|
|
|
|
db.add_to_collection(cid_2, EntityId::new(100)).unwrap();
|
|
db.add_to_collection(cid_2, EntityId::new(200)).unwrap();
|
|
|
|
// Verify before close.
|
|
let collections = db.list_collections(owner).unwrap();
|
|
assert_eq!(collections.len(), 2);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 2: Reopen and verify collections survived.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema)
|
|
.open()
|
|
.unwrap();
|
|
|
|
let collections = db.list_collections(owner).unwrap();
|
|
assert_eq!(
|
|
collections.len(),
|
|
2,
|
|
"both collections should survive restart"
|
|
);
|
|
|
|
// Find favorites and watchlist by name.
|
|
let fav = collections.iter().find(|c| c.name == "favorites").unwrap();
|
|
let watch = collections.iter().find(|c| c.name == "watchlist").unwrap();
|
|
|
|
assert_eq!(fav.item_count, 3, "favorites should have 3 items");
|
|
assert_eq!(fav.visibility, Visibility::Private);
|
|
|
|
assert_eq!(watch.item_count, 2, "watchlist should have 2 items");
|
|
assert_eq!(watch.visibility, Visibility::Public);
|
|
|
|
// Verify bitmap membership through the collection index.
|
|
let index = db.collection_index();
|
|
assert!(
|
|
index.contains(fav.id, 10),
|
|
"favorites should contain item 10"
|
|
);
|
|
assert!(
|
|
index.contains(fav.id, 20),
|
|
"favorites should contain item 20"
|
|
);
|
|
assert!(
|
|
index.contains(fav.id, 30),
|
|
"favorites should contain item 30"
|
|
);
|
|
assert!(
|
|
!index.contains(fav.id, 100),
|
|
"favorites should NOT contain item 100"
|
|
);
|
|
|
|
assert!(
|
|
index.contains(watch.id, 100),
|
|
"watchlist should contain item 100"
|
|
);
|
|
assert!(
|
|
index.contains(watch.id, 200),
|
|
"watchlist should contain item 200"
|
|
);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
}
|
|
|
|
// ── Test 3: Co-engagement index checkpoint/restore ──────────────────────────
|
|
//
|
|
// Inserts co-engagement edges, cleanly shuts down (triggering checkpoint),
|
|
// reopens, and verifies all edge weights are preserved.
|
|
|
|
#[test]
|
|
fn co_engagement_checkpoint_restore_roundtrip() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let schema = m6_schema();
|
|
|
|
// Phase 1: Build co-engagement edges and checkpoint via close().
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema.clone())
|
|
.open()
|
|
.unwrap();
|
|
|
|
// Write items (required for co-engagement to make semantic sense).
|
|
for id in [10u64, 20, 30, 40] {
|
|
db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100))
|
|
.unwrap();
|
|
}
|
|
|
|
// Build co-engagement edges via direct insert.
|
|
let co_eng = db.co_engagement();
|
|
co_eng.insert_edge(10, 20, 5.0);
|
|
co_eng.insert_edge(10, 30, 2.5);
|
|
co_eng.insert_edge(20, 30, 1.0);
|
|
co_eng.insert_edge(30, 40, 7.5);
|
|
|
|
assert_eq!(co_eng.edge_count(), 4);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 2: Reopen and verify co-engagement edges survived.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema)
|
|
.open()
|
|
.unwrap();
|
|
|
|
let co_eng = db.co_engagement();
|
|
|
|
assert!(
|
|
co_eng.edge_count() >= 4,
|
|
"co-engagement edges should survive restart; got {}",
|
|
co_eng.edge_count()
|
|
);
|
|
|
|
// Verify specific edge weights.
|
|
let score_10_20 = co_eng.score(EntityId::new(10), EntityId::new(20));
|
|
assert!(
|
|
(score_10_20 - 5.0).abs() < f32::EPSILON,
|
|
"edge (10, 20) should be 5.0 after restore; got {score_10_20}"
|
|
);
|
|
|
|
let score_10_30 = co_eng.score(EntityId::new(10), EntityId::new(30));
|
|
assert!(
|
|
(score_10_30 - 2.5).abs() < f32::EPSILON,
|
|
"edge (10, 30) should be 2.5 after restore; got {score_10_30}"
|
|
);
|
|
|
|
let score_20_30 = co_eng.score(EntityId::new(20), EntityId::new(30));
|
|
assert!(
|
|
(score_20_30 - 1.0).abs() < f32::EPSILON,
|
|
"edge (20, 30) should be 1.0 after restore; got {score_20_30}"
|
|
);
|
|
|
|
let score_30_40 = co_eng.score(EntityId::new(30), EntityId::new(40));
|
|
assert!(
|
|
(score_30_40 - 7.5).abs() < f32::EPSILON,
|
|
"edge (30, 40) should be 7.5 after restore; got {score_30_40}"
|
|
);
|
|
|
|
// Reverse direction should still be zero (asymmetric).
|
|
let reverse = co_eng.score(EntityId::new(20), EntityId::new(10));
|
|
assert!(
|
|
(reverse - 0.0).abs() < f32::EPSILON,
|
|
"reverse edge (20, 10) should be 0.0; got {reverse}"
|
|
);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
}
|
|
|
|
// ── Test 4: Active session signal recovery via WAL ──────────────────────────
|
|
//
|
|
// Opens a session, writes session signals, then drops the database WITHOUT
|
|
// calling close_session (simulating an unclean shutdown). Reopens and verifies
|
|
// the active session is restored from WAL replay with correct signal counts.
|
|
|
|
#[test]
|
|
fn active_session_signal_recovery() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let schema = m6_session_schema();
|
|
|
|
let user_id = 99u64;
|
|
let session_id_raw: u64;
|
|
|
|
// Phase 1: Start session, write signals, close db without closing session.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema.clone())
|
|
.open()
|
|
.unwrap();
|
|
|
|
// Write some items so signals have targets.
|
|
for id in 1..=3u64 {
|
|
db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100))
|
|
.unwrap();
|
|
}
|
|
|
|
let handle = db
|
|
.start_session(user_id, "test-agent", "default", HashMap::new())
|
|
.unwrap();
|
|
session_id_raw = handle.id.as_u64();
|
|
|
|
// Write session signals.
|
|
let ts = Timestamp::now();
|
|
db.session_signal(&handle, "view", EntityId::new(1), 1.0, ts, None)
|
|
.unwrap();
|
|
db.session_signal(&handle, "view", EntityId::new(2), 1.0, ts, None)
|
|
.unwrap();
|
|
db.session_signal(
|
|
&handle,
|
|
"like",
|
|
EntityId::new(1),
|
|
1.0,
|
|
ts,
|
|
Some("great content".to_string()),
|
|
)
|
|
.unwrap();
|
|
|
|
// Verify the session is active.
|
|
let active = db.active_sessions();
|
|
assert_eq!(active.len(), 1, "one active session before close");
|
|
assert_eq!(active[0].signals_written, 3);
|
|
|
|
// Close DB without closing session -- simulates unclean shutdown.
|
|
// The session WAL events (start + 3 signals) should be written.
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 2: Reopen and verify session was restored from WAL.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema)
|
|
.open()
|
|
.unwrap();
|
|
|
|
let active = db.active_sessions();
|
|
|
|
// The session should be restored as active (no Close event in WAL).
|
|
assert_eq!(
|
|
active.len(),
|
|
1,
|
|
"session should be restored as active after restart"
|
|
);
|
|
|
|
let restored = &active[0];
|
|
assert_eq!(
|
|
restored.id.as_u64(),
|
|
session_id_raw,
|
|
"restored session should have the same ID"
|
|
);
|
|
assert_eq!(restored.user_id, user_id, "restored session user_id");
|
|
assert_eq!(
|
|
restored.signals_written, 3,
|
|
"restored session should have 3 signals replayed"
|
|
);
|
|
|
|
// The snapshot should reflect the replayed signals.
|
|
let snapshot = db
|
|
.session_snapshot(tidaldb::SessionId::from_raw(session_id_raw))
|
|
.unwrap();
|
|
assert_eq!(
|
|
snapshot.signaled_entities.len(),
|
|
2,
|
|
"two distinct entities were signaled (items 1 and 2)"
|
|
);
|
|
assert!(
|
|
snapshot.signaled_entities.contains(&1),
|
|
"entity 1 should be in signaled set"
|
|
);
|
|
assert!(
|
|
snapshot.signaled_entities.contains(&2),
|
|
"entity 2 should be in signaled set"
|
|
);
|
|
|
|
// Check that annotations were replayed.
|
|
assert!(
|
|
!snapshot.annotations.is_empty(),
|
|
"annotation 'great content' should be replayed"
|
|
);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
}
|
|
|
|
// ── Test 5: Mixed workload crash recovery ───────────────────────────────────
|
|
//
|
|
// Writes items, signals, collections, cohorts, and co-engagement edges in a
|
|
// single session. Cleanly closes and reopens. Verifies all state surfaces are
|
|
// consistent after restart.
|
|
|
|
#[test]
|
|
fn mixed_workload_crash_recovery() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let schema = m6_schema();
|
|
let owner = EntityId::new(1);
|
|
|
|
// Phase 1: Write a mixed workload.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema.clone())
|
|
.open()
|
|
.unwrap();
|
|
|
|
// Define a cohort.
|
|
db.define_cohort(CohortDef {
|
|
name: "tech_en".to_string(),
|
|
predicate: Predicate::And(vec![
|
|
Predicate::Eq {
|
|
field: "locale".into(),
|
|
value: "en".into(),
|
|
},
|
|
Predicate::Eq {
|
|
field: "primary_category".into(),
|
|
value: "tech".into(),
|
|
},
|
|
]),
|
|
})
|
|
.unwrap();
|
|
|
|
// Write user for cohort membership.
|
|
db.write_user(EntityId::new(1001), &user_metadata("en", "tech"))
|
|
.unwrap();
|
|
|
|
// Write 10 items.
|
|
for id in 1..=10u64 {
|
|
db.write_item_with_metadata(EntityId::new(id), &item_metadata("tech", 100))
|
|
.unwrap();
|
|
}
|
|
|
|
// Create a collection with some items.
|
|
let cid = db
|
|
.create_collection(owner, "best-of", Visibility::Public)
|
|
.unwrap();
|
|
db.add_to_collection(cid, EntityId::new(1)).unwrap();
|
|
db.add_to_collection(cid, EntityId::new(5)).unwrap();
|
|
db.add_to_collection(cid, EntityId::new(10)).unwrap();
|
|
|
|
// Write global signals.
|
|
let ts = Timestamp::now();
|
|
for id in 1..=10u64 {
|
|
db.signal("view", EntityId::new(id), 1.0, ts).unwrap();
|
|
}
|
|
// Extra views for item 1.
|
|
for _ in 0..4 {
|
|
db.signal("view", EntityId::new(1), 1.0, ts).unwrap();
|
|
}
|
|
|
|
// Write signals with user context (triggers cohort attribution).
|
|
for i in 1..=3u64 {
|
|
db.signal_with_context("view", EntityId::new(i), 1.0, ts, Some(1001), Some(100))
|
|
.unwrap();
|
|
}
|
|
|
|
// Build co-engagement edges.
|
|
let co_eng = db.co_engagement();
|
|
co_eng.insert_edge(1, 2, 3.0);
|
|
co_eng.insert_edge(1, 5, 1.5);
|
|
|
|
// Save a search.
|
|
db.save_search(EntityId::new(1001), "my-query", "jazz music", None)
|
|
.unwrap();
|
|
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 2: Reopen and verify all state surfaces.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema)
|
|
.open()
|
|
.unwrap();
|
|
|
|
// 1. Item metadata survives in durable storage.
|
|
// Note: item_count() reads from the in-memory universe bitmap which
|
|
// is NOT rebuilt on restart -- it is an ephemeral acceleration index.
|
|
// Items are durable in fjall; verify via get_item_metadata().
|
|
let meta = db.get_item_metadata(EntityId::new(1)).unwrap();
|
|
assert!(meta.is_some(), "item 1 metadata should survive restart");
|
|
let meta = meta.unwrap();
|
|
assert_eq!(meta.get("category").map(String::as_str), Some("tech"));
|
|
|
|
// 3. Global signal state survives (via checkpoint + WAL replay).
|
|
let count_1 = db
|
|
.read_windowed_count(EntityId::new(1), "view", Window::AllTime)
|
|
.unwrap();
|
|
// Item 1 had 1 global signal + 4 extra + 1 context signal = 6 total.
|
|
assert!(
|
|
count_1 >= 5,
|
|
"item 1 should have at least 5 views after restart; got {count_1}"
|
|
);
|
|
|
|
let score = db.read_decay_score(EntityId::new(1), "view", 0).unwrap();
|
|
assert!(
|
|
score.is_some(),
|
|
"item 1 decay score should exist after restart"
|
|
);
|
|
assert!(
|
|
score.unwrap() > 0.0,
|
|
"item 1 decay score should be positive"
|
|
);
|
|
|
|
// 4. Collections survive.
|
|
let collections = db.list_collections(owner).unwrap();
|
|
assert_eq!(collections.len(), 1, "collection should survive restart");
|
|
assert_eq!(collections[0].name, "best-of");
|
|
assert_eq!(collections[0].item_count, 3);
|
|
|
|
let index = db.collection_index();
|
|
assert!(index.contains(collections[0].id, 1));
|
|
assert!(index.contains(collections[0].id, 5));
|
|
assert!(index.contains(collections[0].id, 10));
|
|
assert!(!index.contains(collections[0].id, 2));
|
|
|
|
// 5. Cohort definitions survive.
|
|
let dup = db.define_cohort(CohortDef {
|
|
name: "tech_en".to_string(),
|
|
predicate: Predicate::Eq {
|
|
field: "x".into(),
|
|
value: "y".into(),
|
|
},
|
|
});
|
|
assert!(dup.is_err(), "tech_en cohort should survive restart");
|
|
|
|
// 6. Cohort signal state survives.
|
|
let cohort_ledger = db.cohort_ledger();
|
|
for i in 1..=3u64 {
|
|
let count = cohort_ledger
|
|
.read_windowed_count("tech_en", EntityId::new(i), "view", Window::AllTime)
|
|
.unwrap();
|
|
assert_eq!(
|
|
count, 1,
|
|
"cohort tech_en item {i} should have 1 view after restart; got {count}"
|
|
);
|
|
}
|
|
|
|
// 7. Co-engagement edges survive.
|
|
let co_eng = db.co_engagement();
|
|
assert!(
|
|
co_eng.edge_count() >= 2,
|
|
"co-engagement edges should survive restart; got {}",
|
|
co_eng.edge_count()
|
|
);
|
|
let score_1_2 = co_eng.score(EntityId::new(1), EntityId::new(2));
|
|
assert!(
|
|
(score_1_2 - 3.0).abs() < f32::EPSILON,
|
|
"edge (1, 2) should be 3.0; got {score_1_2}"
|
|
);
|
|
let score_1_5 = co_eng.score(EntityId::new(1), EntityId::new(5));
|
|
assert!(
|
|
(score_1_5 - 1.5).abs() < f32::EPSILON,
|
|
"edge (1, 5) should be 1.5; got {score_1_5}"
|
|
);
|
|
|
|
// 8. Saved search survives.
|
|
let searches = db.list_saved_searches(EntityId::new(1001)).unwrap();
|
|
assert_eq!(searches.len(), 1, "saved search should survive restart");
|
|
assert_eq!(searches[0].name, "my-query");
|
|
assert_eq!(searches[0].query_text, "jazz music");
|
|
|
|
db.close().unwrap();
|
|
}
|
|
}
|
|
|
|
// ── Test 6: Collection add/remove survives restart ──────────────────────────
|
|
//
|
|
// Tests that add_to_collection and remove_from_collection operations are
|
|
// durable: items added are present after restart, items removed are absent.
|
|
|
|
#[test]
|
|
fn collection_add_remove_survives_restart() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let schema = m6_schema();
|
|
let owner = EntityId::new(7);
|
|
|
|
// Phase 1: Create, add, remove.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema.clone())
|
|
.open()
|
|
.unwrap();
|
|
|
|
let cid = db
|
|
.create_collection(owner, "playlist", Visibility::Shared)
|
|
.unwrap();
|
|
|
|
db.add_to_collection(cid, EntityId::new(1)).unwrap();
|
|
db.add_to_collection(cid, EntityId::new(2)).unwrap();
|
|
db.add_to_collection(cid, EntityId::new(3)).unwrap();
|
|
db.add_to_collection(cid, EntityId::new(4)).unwrap();
|
|
|
|
// Remove item 2 and 4.
|
|
db.remove_from_collection(cid, EntityId::new(2)).unwrap();
|
|
db.remove_from_collection(cid, EntityId::new(4)).unwrap();
|
|
|
|
// Before close: should have items 1 and 3.
|
|
let collections = db.list_collections(owner).unwrap();
|
|
assert_eq!(collections[0].item_count, 2);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 2: Reopen and verify removal was persisted.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema)
|
|
.open()
|
|
.unwrap();
|
|
|
|
let collections = db.list_collections(owner).unwrap();
|
|
assert_eq!(collections.len(), 1);
|
|
assert_eq!(collections[0].name, "playlist");
|
|
assert_eq!(
|
|
collections[0].item_count, 2,
|
|
"should have 2 items after restart (items 1 and 3)"
|
|
);
|
|
assert_eq!(collections[0].visibility, Visibility::Shared);
|
|
|
|
let index = db.collection_index();
|
|
assert!(
|
|
index.contains(collections[0].id, 1),
|
|
"item 1 should be in collection"
|
|
);
|
|
assert!(
|
|
!index.contains(collections[0].id, 2),
|
|
"item 2 should NOT be in collection (removed)"
|
|
);
|
|
assert!(
|
|
index.contains(collections[0].id, 3),
|
|
"item 3 should be in collection"
|
|
);
|
|
assert!(
|
|
!index.contains(collections[0].id, 4),
|
|
"item 4 should NOT be in collection (removed)"
|
|
);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
}
|
|
|
|
// ── Test 7: Cohort ledger with multiple cohorts and signal types ────────────
|
|
//
|
|
// Tests that cohort signal state for multiple cohorts and multiple signal
|
|
// types survives restart independently.
|
|
|
|
#[test]
|
|
fn cohort_multi_signal_type_checkpoint_restore() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let schema = m6_schema();
|
|
|
|
// Phase 1: Record different signal types into different cohorts.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema.clone())
|
|
.open()
|
|
.unwrap();
|
|
|
|
db.define_cohort(CohortDef {
|
|
name: "english".to_string(),
|
|
predicate: Predicate::Eq {
|
|
field: "locale".into(),
|
|
value: "en".into(),
|
|
},
|
|
})
|
|
.unwrap();
|
|
|
|
db.define_cohort(CohortDef {
|
|
name: "tech".to_string(),
|
|
predicate: Predicate::Eq {
|
|
field: "primary_category".into(),
|
|
value: "tech".into(),
|
|
},
|
|
})
|
|
.unwrap();
|
|
|
|
// User belongs to BOTH cohorts.
|
|
db.write_user(EntityId::new(3001), &user_metadata("en", "tech"))
|
|
.unwrap();
|
|
|
|
db.write_item_with_metadata(EntityId::new(1), &item_metadata("tech", 100))
|
|
.unwrap();
|
|
|
|
let ts = Timestamp::now();
|
|
|
|
// Signal "view" -- attributed to both english and tech cohorts.
|
|
db.signal_with_context("view", EntityId::new(1), 1.0, ts, Some(3001), Some(100))
|
|
.unwrap();
|
|
|
|
// Signal "like" -- also attributed to both.
|
|
db.signal_with_context("like", EntityId::new(1), 1.0, ts, Some(3001), Some(100))
|
|
.unwrap();
|
|
|
|
let ledger = db.cohort_ledger();
|
|
assert_eq!(
|
|
ledger
|
|
.read_windowed_count("english", EntityId::new(1), "view", Window::AllTime)
|
|
.unwrap(),
|
|
1
|
|
);
|
|
assert_eq!(
|
|
ledger
|
|
.read_windowed_count("english", EntityId::new(1), "like", Window::AllTime)
|
|
.unwrap(),
|
|
1
|
|
);
|
|
assert_eq!(
|
|
ledger
|
|
.read_windowed_count("tech", EntityId::new(1), "view", Window::AllTime)
|
|
.unwrap(),
|
|
1
|
|
);
|
|
assert_eq!(
|
|
ledger
|
|
.read_windowed_count("tech", EntityId::new(1), "like", Window::AllTime)
|
|
.unwrap(),
|
|
1
|
|
);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 2: Reopen and verify all cohort x signal_type combinations.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema)
|
|
.open()
|
|
.unwrap();
|
|
|
|
let ledger = db.cohort_ledger();
|
|
|
|
let english_view = ledger
|
|
.read_windowed_count("english", EntityId::new(1), "view", Window::AllTime)
|
|
.unwrap();
|
|
assert_eq!(
|
|
english_view, 1,
|
|
"english cohort view count should be 1 after restart"
|
|
);
|
|
|
|
let english_like = ledger
|
|
.read_windowed_count("english", EntityId::new(1), "like", Window::AllTime)
|
|
.unwrap();
|
|
assert_eq!(
|
|
english_like, 1,
|
|
"english cohort like count should be 1 after restart"
|
|
);
|
|
|
|
let tech_view = ledger
|
|
.read_windowed_count("tech", EntityId::new(1), "view", Window::AllTime)
|
|
.unwrap();
|
|
assert_eq!(
|
|
tech_view, 1,
|
|
"tech cohort view count should be 1 after restart"
|
|
);
|
|
|
|
let tech_like = ledger
|
|
.read_windowed_count("tech", EntityId::new(1), "like", Window::AllTime)
|
|
.unwrap();
|
|
assert_eq!(
|
|
tech_like, 1,
|
|
"tech cohort like count should be 1 after restart"
|
|
);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
}
|
|
|
|
// ── Test 8: Co-engagement via signal_with_context survives restart ───────────
|
|
//
|
|
// Tests that co-engagement edges created through the natural signal_with_context
|
|
// path (not just insert_edge) survive restart.
|
|
|
|
#[test]
|
|
fn co_engagement_via_signal_survives_restart() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let schema = m6_schema();
|
|
|
|
// Phase 1: Generate co-engagement via like signals.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema.clone())
|
|
.open()
|
|
.unwrap();
|
|
|
|
for id in [10u64, 20, 30] {
|
|
db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100))
|
|
.unwrap();
|
|
}
|
|
|
|
let ts = Timestamp::now();
|
|
|
|
// User 1 likes items 10, 20, 30 in sequence.
|
|
// This should create co-engagement edges: (20,10), (30,10), (30,20).
|
|
db.signal_with_context("like", EntityId::new(10), 1.0, ts, Some(1), Some(100))
|
|
.unwrap();
|
|
db.signal_with_context("like", EntityId::new(20), 1.0, ts, Some(1), Some(100))
|
|
.unwrap();
|
|
db.signal_with_context("like", EntityId::new(30), 1.0, ts, Some(1), Some(100))
|
|
.unwrap();
|
|
|
|
// Verify edges exist before close.
|
|
let co_eng = db.co_engagement();
|
|
assert!(
|
|
co_eng.score(EntityId::new(20), EntityId::new(10)) > 0.0,
|
|
"pre-close: (20, 10) edge should exist"
|
|
);
|
|
assert!(
|
|
co_eng.score(EntityId::new(30), EntityId::new(10)) > 0.0,
|
|
"pre-close: (30, 10) edge should exist"
|
|
);
|
|
assert!(
|
|
co_eng.score(EntityId::new(30), EntityId::new(20)) > 0.0,
|
|
"pre-close: (30, 20) edge should exist"
|
|
);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 2: Reopen and verify edges survived.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema)
|
|
.open()
|
|
.unwrap();
|
|
|
|
let co_eng = db.co_engagement();
|
|
|
|
let s_20_10 = co_eng.score(EntityId::new(20), EntityId::new(10));
|
|
assert!(
|
|
s_20_10 > 0.0,
|
|
"post-restart: (20, 10) edge should survive; got {s_20_10}"
|
|
);
|
|
|
|
let s_30_10 = co_eng.score(EntityId::new(30), EntityId::new(10));
|
|
assert!(
|
|
s_30_10 > 0.0,
|
|
"post-restart: (30, 10) edge should survive; got {s_30_10}"
|
|
);
|
|
|
|
let s_30_20 = co_eng.score(EntityId::new(30), EntityId::new(20));
|
|
assert!(
|
|
s_30_20 > 0.0,
|
|
"post-restart: (30, 20) edge should survive; got {s_30_20}"
|
|
);
|
|
|
|
// Asymmetric: reverse should still be 0.
|
|
let reverse = co_eng.score(EntityId::new(10), EntityId::new(20));
|
|
assert!(
|
|
(reverse - 0.0).abs() < f32::EPSILON,
|
|
"post-restart: reverse (10, 20) should be 0; got {reverse}"
|
|
);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
}
|
|
|
|
// ── Test 9: Saved search persistence roundtrip ──────────────────────────────
|
|
//
|
|
// Tests that saved searches survive restart and can be listed correctly.
|
|
|
|
#[test]
|
|
fn saved_search_persistence_roundtrip() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let schema = m6_schema();
|
|
let user = EntityId::new(777);
|
|
|
|
// Phase 1: Save multiple searches.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema.clone())
|
|
.open()
|
|
.unwrap();
|
|
|
|
db.save_search(user, "jazz-favorites", "jazz music favorites", None)
|
|
.unwrap();
|
|
db.save_search(user, "rust-databases", "rust embedded database", None)
|
|
.unwrap();
|
|
db.save_search(user, "ml-papers", "machine learning papers", None)
|
|
.unwrap();
|
|
|
|
// Delete one.
|
|
db.delete_saved_search(user, "ml-papers").unwrap();
|
|
|
|
let searches = db.list_saved_searches(user).unwrap();
|
|
assert_eq!(searches.len(), 2, "should have 2 searches before close");
|
|
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 2: Reopen and verify.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema)
|
|
.open()
|
|
.unwrap();
|
|
|
|
let searches = db.list_saved_searches(user).unwrap();
|
|
assert_eq!(
|
|
searches.len(),
|
|
2,
|
|
"should have 2 saved searches after restart"
|
|
);
|
|
|
|
let names: Vec<&str> = searches.iter().map(|s| s.name.as_str()).collect();
|
|
assert!(
|
|
names.contains(&"jazz-favorites"),
|
|
"jazz-favorites should survive restart"
|
|
);
|
|
assert!(
|
|
names.contains(&"rust-databases"),
|
|
"rust-databases should survive restart"
|
|
);
|
|
assert!(
|
|
!names.contains(&"ml-papers"),
|
|
"deleted ml-papers should NOT survive restart"
|
|
);
|
|
|
|
// Verify query text.
|
|
let jazz = searches
|
|
.iter()
|
|
.find(|s| s.name == "jazz-favorites")
|
|
.unwrap();
|
|
assert_eq!(jazz.query_text, "jazz music favorites");
|
|
|
|
db.close().unwrap();
|
|
}
|
|
}
|
|
|
|
// ── Test 10: Two-phase mixed collection + signal restart ────────────────────
|
|
//
|
|
// Write items and signals in phase 1, close. Reopen in phase 2, add more items
|
|
// and collections, close. Reopen in phase 3 and verify all state from both
|
|
// phases is present. Tests incremental persistence across multiple sessions.
|
|
|
|
#[test]
|
|
fn incremental_persistence_across_multiple_opens() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let schema = m6_schema();
|
|
|
|
// Phase 1: Write items 1-5 and signals.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema.clone())
|
|
.open()
|
|
.unwrap();
|
|
|
|
for id in 1..=5u64 {
|
|
db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100))
|
|
.unwrap();
|
|
}
|
|
|
|
let ts = Timestamp::now();
|
|
for id in 1..=5u64 {
|
|
db.signal("view", EntityId::new(id), 1.0, ts).unwrap();
|
|
}
|
|
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 2: Reopen, add items 6-10, create collection, add more signals.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema.clone())
|
|
.open()
|
|
.unwrap();
|
|
|
|
// Verify phase 1 state via durable metadata (not in-memory universe).
|
|
for id in 1..=5u64 {
|
|
let meta = db.get_item_metadata(EntityId::new(id)).unwrap();
|
|
assert!(meta.is_some(), "phase 1 item {id} metadata should survive");
|
|
}
|
|
|
|
for id in 6..=10u64 {
|
|
db.write_item_with_metadata(EntityId::new(id), &item_metadata("blues", 200))
|
|
.unwrap();
|
|
}
|
|
|
|
let cid = db
|
|
.create_collection(EntityId::new(1), "phase2-collection", Visibility::Private)
|
|
.unwrap();
|
|
db.add_to_collection(cid, EntityId::new(3)).unwrap();
|
|
db.add_to_collection(cid, EntityId::new(7)).unwrap();
|
|
|
|
let ts = Timestamp::now();
|
|
for id in 6..=10u64 {
|
|
db.signal("view", EntityId::new(id), 1.0, ts).unwrap();
|
|
}
|
|
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 3: Verify all state from both phases.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema)
|
|
.open()
|
|
.unwrap();
|
|
|
|
// All 10 items durable in fjall (metadata readable).
|
|
for id in 1..=10u64 {
|
|
let meta = db.get_item_metadata(EntityId::new(id)).unwrap();
|
|
assert!(
|
|
meta.is_some(),
|
|
"item {id} metadata should survive two phases"
|
|
);
|
|
}
|
|
|
|
// Signal state from both phases.
|
|
for id in 1..=10u64 {
|
|
let count = db
|
|
.read_windowed_count(EntityId::new(id), "view", Window::AllTime)
|
|
.unwrap();
|
|
assert!(
|
|
count >= 1,
|
|
"item {id} should have at least 1 view after two phases; got {count}"
|
|
);
|
|
}
|
|
|
|
// Collection from phase 2 survives.
|
|
let collections = db.list_collections(EntityId::new(1)).unwrap();
|
|
assert_eq!(collections.len(), 1);
|
|
assert_eq!(collections[0].name, "phase2-collection");
|
|
assert_eq!(collections[0].item_count, 2);
|
|
|
|
let index = db.collection_index();
|
|
assert!(index.contains(collections[0].id, 3));
|
|
assert!(index.contains(collections[0].id, 7));
|
|
|
|
db.close().unwrap();
|
|
}
|
|
}
|