tidaldb/tidal/tests/m6_social.rs
2026-02-23 22:41:16 -07:00

575 lines
19 KiB
Rust

//! Milestone 6 Phase 2 Integration Tests: Social Graph Extension + Collaborative Filtering.
//!
//! Exercises the complete M6P2 scenario end-to-end:
//!
//! 1. Reverse relationship index (creator -> follower user IDs).
//! 2. `FilterExpr::SocialGraph` constraining candidates at depth=1 and depth=2.
//! 3. Co-engagement index: pairwise item co-occurrence tracking and scoring.
//! 4. `related` profile boosted by co-engagement edges.
//! 5. Social-graph-scoped trending using `UserSignalIndex`.
//! 6. Co-engagement LRU eviction at capacity.
//! 7. Co-engagement checkpoint/restore across reopen.
#![allow(clippy::unwrap_used, clippy::cast_precision_loss)]
use std::collections::HashMap;
use std::time::Duration;
use tidaldb::TidalDb;
use tidaldb::entities::RelationshipType;
use tidaldb::query::retrieve::Retrieve;
use tidaldb::schema::{DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window};
use tidaldb::storage::indexes::filter::FilterExpr;
// ── Schema ──────────────────────────────────────────────────────────────────
fn m6_social_schema() -> tidaldb::schema::Schema {
let mut builder = SchemaBuilder::new();
for &(name, half_life_days) in &[
("view", 7),
("like", 14),
("share", 7),
("skip", 1),
("completion", 14),
("dislike", 1),
("hide", 1),
] {
let _ = builder
.signal(
name,
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(half_life_days * 24 * 3600),
},
)
.windows(&[
Window::OneHour,
Window::TwentyFourHours,
Window::SevenDays,
Window::AllTime,
])
.velocity(true)
.add();
}
builder.build().expect("m6_social schema must be valid")
}
// ── Helpers ─────────────────────────────────────────────────────────────────
fn item_metadata(category: &str, creator_id: u64) -> HashMap<String, String> {
let mut meta = HashMap::new();
meta.insert("category".to_string(), category.to_string());
meta.insert("format".to_string(), "video".to_string());
meta.insert("creator_id".to_string(), creator_id.to_string());
meta.insert(
"created_at".to_string(),
Timestamp::now().as_nanos().to_string(),
);
meta
}
fn open_ephemeral_db() -> TidalDb {
TidalDb::builder()
.ephemeral()
.with_schema(m6_social_schema())
.open()
.expect("db open")
}
// ── Test 1: Reverse relationship index ──────────────────────────────────────
#[test]
fn reverse_index_follows_creator() {
let db = open_ephemeral_db();
let ts = Timestamp::now();
// Users 1, 2, 3 follow creator 100.
for user_id in 1..=3u64 {
db.write_relationship(
EntityId::new(user_id),
RelationshipType::Follows,
EntityId::new(100),
1.0,
ts,
)
.unwrap();
}
// Verify reverse index: creator 100 should have followers {1, 2, 3}.
let followers = db.user_state().follower_ids(100);
assert_eq!(followers.len(), 3, "creator 100 should have 3 followers");
for uid in &[1u64, 2, 3] {
assert!(
followers.contains(uid),
"follower {uid} should be in reverse index"
);
}
// Unfollow: user 2 unfollows creator 100.
db.delete_relationship(
EntityId::new(2),
RelationshipType::Follows,
EntityId::new(100),
)
.unwrap();
let followers = db.user_state().follower_ids(100);
assert_eq!(
followers.len(),
2,
"creator 100 should have 2 followers after unfollow"
);
assert!(!followers.contains(&2), "user 2 should be removed");
assert!(followers.contains(&1));
assert!(followers.contains(&3));
}
// ── Test 2: Social graph depth=1 filter ─────────────────────────────────────
#[test]
fn social_graph_depth1_filter_constrains_to_followed_creators() {
let db = open_ephemeral_db();
let ts = Timestamp::now();
// Creator 100 has items 10, 11.
for id in [10u64, 11] {
db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100))
.unwrap();
}
// Creator 200 has items 20, 21.
for id in [20u64, 21] {
db.write_item_with_metadata(EntityId::new(id), &item_metadata("blues", 200))
.unwrap();
}
// Creator 300 has item 30 (not followed by user 1).
db.write_item_with_metadata(EntityId::new(30), &item_metadata("rock", 300))
.unwrap();
// User 1 follows creator 100 and 200.
db.write_relationship(
EntityId::new(1),
RelationshipType::Follows,
EntityId::new(100),
1.0,
ts,
)
.unwrap();
db.write_relationship(
EntityId::new(1),
RelationshipType::Follows,
EntityId::new(200),
1.0,
ts,
)
.unwrap();
// Query with SocialGraph(user_id=1, depth=1) filter.
let query = Retrieve::builder()
.profile("new")
.for_user(1)
.filter(FilterExpr::social_graph(1, 1))
.limit(20)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
// Should include items from creator 100 and 200, but NOT creator 300.
assert!(ids.contains(&10), "item 10 from followed creator 100");
assert!(ids.contains(&11), "item 11 from followed creator 100");
assert!(ids.contains(&20), "item 20 from followed creator 200");
assert!(ids.contains(&21), "item 21 from followed creator 200");
assert!(
!ids.contains(&30),
"item 30 from unfollowed creator 300 should be excluded"
);
}
// ── Test 3: Social graph depth=2 expands to follower community ──────────────
#[test]
fn social_graph_depth2_expands_to_follower_community() {
let db = open_ephemeral_db();
let ts = Timestamp::now();
// Creator 100 has item 10.
db.write_item_with_metadata(EntityId::new(10), &item_metadata("jazz", 100))
.unwrap();
// Creator 200 has item 20 (not followed by user 1).
db.write_item_with_metadata(EntityId::new(20), &item_metadata("blues", 200))
.unwrap();
// Item 50 exists but belongs to no followed creator.
db.write_item_with_metadata(EntityId::new(50), &item_metadata("rock", 300))
.unwrap();
// User 1 follows creator 100.
db.write_relationship(
EntityId::new(1),
RelationshipType::Follows,
EntityId::new(100),
1.0,
ts,
)
.unwrap();
// User 2 also follows creator 100.
db.write_relationship(
EntityId::new(2),
RelationshipType::Follows,
EntityId::new(100),
1.0,
ts,
)
.unwrap();
// User 2 has seen item 20 (from creator 200, not followed by user 1).
// This simulates depth-2 expansion: user 2 is a co-follower of creator 100,
// so items seen by user 2 become visible at depth=2.
db.signal_with_context("view", EntityId::new(20), 1.0, ts, Some(2), Some(200))
.unwrap();
// Query with SocialGraph(user_id=1, depth=2).
let query = Retrieve::builder()
.profile("new")
.for_user(1)
.filter(FilterExpr::social_graph(1, 2))
.limit(20)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
// depth-1: item 10 (from creator 100)
assert!(ids.contains(&10), "item 10 from followed creator 100");
// depth-2: item 20 (seen by user 2, co-follower of creator 100)
assert!(
ids.contains(&20),
"item 20 should appear at depth=2 (seen by co-follower)"
);
}
// ── Test 4: Co-engagement recording and scoring ─────────────────────────────
#[test]
fn co_engagement_recording_and_scoring() {
let db = open_ephemeral_db();
let ts = Timestamp::now();
// Write items.
for id in [10u64, 20, 30] {
db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100))
.unwrap();
}
// User 1 engages positively with items 10, 20, 30 in sequence.
// "like" is a positive engagement signal.
db.signal_with_context("like", EntityId::new(10), 1.0, ts, Some(1), Some(100))
.unwrap();
db.signal_with_context("like", EntityId::new(20), 1.0, ts, Some(1), Some(100))
.unwrap();
db.signal_with_context("like", EntityId::new(30), 1.0, ts, Some(1), Some(100))
.unwrap();
// Co-engagement should record (20, 10), (30, 10), (30, 20) edges.
let co_eng = db.co_engagement();
assert!(
co_eng.score(EntityId::new(20), EntityId::new(10)) > 0.0,
"co-engagement (20, 10) should be positive"
);
assert!(
co_eng.score(EntityId::new(30), EntityId::new(10)) > 0.0,
"co-engagement (30, 10) should be positive"
);
assert!(
co_eng.score(EntityId::new(30), EntityId::new(20)) > 0.0,
"co-engagement (30, 20) should be positive"
);
// Asymmetric: reverse direction should be 0.
assert_eq!(
co_eng.score(EntityId::new(10), EntityId::new(20)),
0.0,
"co-engagement is asymmetric: (10, 20) should be 0"
);
// User 2 also likes items 10 and 20 -> (20, 10) weight should increment.
db.signal_with_context("like", EntityId::new(10), 1.0, ts, Some(2), Some(100))
.unwrap();
db.signal_with_context("like", EntityId::new(20), 1.0, ts, Some(2), Some(100))
.unwrap();
assert!(
co_eng.score(EntityId::new(20), EntityId::new(10)) > 1.0,
"co-engagement (20, 10) should increment from two users"
);
}
// ── Test 5: Related profile boosts co-engaged items ─────────────────────────
#[test]
fn related_profile_boosts_co_engaged_items() {
let db = open_ephemeral_db();
let ts = Timestamp::now();
// Write 5 items.
for id in 1..=5u64 {
db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100))
.unwrap();
// Give all items the same view signal so base scores are equal.
db.signal("view", EntityId::new(id), 10.0, ts).unwrap();
}
// Build co-engagement edges: seed=1 -> items 2 and 3 (strong), item 4 (weak), item 5 (none).
let co_eng = db.co_engagement();
co_eng.insert_edge(1, 2, 5.0);
co_eng.insert_edge(1, 3, 3.0);
co_eng.insert_edge(1, 4, 0.5);
// Item 5 has no co-engagement with item 1.
// RETRIEVE with `related` profile and similar_to=1.
// Exclude the seed item explicitly (RETRIEVE does not auto-exclude similar_to).
let query = Retrieve::builder()
.profile("related")
.similar_to(EntityId::new(1))
.exclude(vec![EntityId::new(1)])
.limit(5)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
// Item 1 should be excluded via the exclude list.
assert!(
!ids.contains(&1),
"seed item 1 should not appear in results"
);
// Items 2 and 3 should rank higher than items 4 and 5 due to co-engagement boost.
// Find rank positions.
if results.items.len() >= 3 {
let pos_2 = ids.iter().position(|&id| id == 2);
let pos_5 = ids.iter().position(|&id| id == 5);
// Item 2 (co-eng weight 5.0) should rank ahead of item 5 (no co-eng).
if let (Some(p2), Some(p5)) = (pos_2, pos_5) {
assert!(
p2 < p5,
"item 2 (co-engaged) should rank ahead of item 5 (no co-engagement): pos_2={p2}, pos_5={p5}"
);
}
}
}
// ── Test 6: Social-graph-scoped trending differs from global ────────────────
#[test]
fn social_trending_differs_from_global_trending() {
let db = open_ephemeral_db();
let base_ts_ns = 1_708_000_000_000_000_000u64;
let ts = Timestamp::from_nanos(base_ts_ns);
// Write items.
for id in 1..=3u64 {
db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100))
.unwrap();
}
// Creator 100 exists.
let mut creator_meta = HashMap::new();
creator_meta.insert("name".to_string(), "creator100".to_string());
db.write_creator(EntityId::new(100), &creator_meta).unwrap();
// User 1 follows creator 100.
db.write_relationship(
EntityId::new(1),
RelationshipType::Follows,
EntityId::new(100),
1.0,
ts,
)
.unwrap();
// User 2 follows creator 100 (co-follower).
db.write_relationship(
EntityId::new(2),
RelationshipType::Follows,
EntityId::new(100),
1.0,
ts,
)
.unwrap();
// User 2 (in social graph) views item 1 heavily.
for i in 0..100u64 {
let t = Timestamp::from_nanos(base_ts_ns + i * 1_000_000_000);
db.signal_with_context("view", EntityId::new(1), 1.0, t, Some(2), Some(100))
.unwrap();
}
// User 99 (NOT in social graph) views item 2 heavily.
for i in 0..200u64 {
let t = Timestamp::from_nanos(base_ts_ns + i * 1_000_000_000);
db.signal_with_context("view", EntityId::new(2), 1.0, t, Some(99), Some(100))
.unwrap();
}
// Global trending: item 2 should rank higher (200 views vs 100).
let global_query = Retrieve::builder()
.profile("trending")
.limit(10)
.build()
.unwrap();
let global_results = db.retrieve(&global_query).unwrap();
let global_ids: Vec<u64> = global_results
.items
.iter()
.map(|r| r.entity_id.as_u64())
.collect();
// Social-scoped trending for user 1: only signals from user 2 (co-follower)
// should be counted. Item 1 has 100 views from user 2; item 2 has 0 from user 2.
let social_query = Retrieve::builder()
.profile("trending")
.for_user(1)
.filter(FilterExpr::social_graph(1, 1))
.limit(10)
.build()
.unwrap();
let social_results = db.retrieve(&social_query).unwrap();
let social_ids: Vec<u64> = social_results
.items
.iter()
.map(|r| r.entity_id.as_u64())
.collect();
// The key assertion: social-scoped results should have different ranking
// than global results. In social scope, item 1 should rank higher than item 2
// because user 2 (co-follower) viewed item 1 heavily, while item 2's views
// came from user 99 (outside the social graph).
if !social_ids.is_empty() && !global_ids.is_empty() {
// In the social-scoped query, item 1 should appear and ideally rank first.
if let Some(pos_1) = social_ids.iter().position(|&id| id == 1) {
let pos_2 = social_ids.iter().position(|&id| id == 2);
// If both appear, item 1 should rank above item 2 in social scope.
if let Some(p2) = pos_2 {
assert!(
pos_1 < p2,
"social trending: item 1 (100 views from co-follower) should rank above item 2 (0 views from co-follower): pos_1={pos_1}, pos_2={p2}"
);
}
}
}
}
// ── Test 7: Co-engagement LRU eviction at capacity ──────────────────────────
#[test]
fn co_engagement_lru_eviction_at_capacity() {
// Directly test the CoEngagementIndex with a small capacity.
let index = tidaldb::entities::CoEngagementIndex::with_capacity(10);
// Build many co-engagement edges: 20 users each liking items 1..5.
// Each record_positive call adds up to N-1 edges but evicts at most 1.
for user_id in 1..=20u64 {
for item_id in 1..=5u64 {
index.record_positive(user_id, EntityId::new(item_id));
}
}
// The edge count should be bounded -- eviction should have removed
// some edges rather than allowing unbounded growth.
let count = index.edge_count();
// Theoretical max without eviction: many hundreds of edges.
// With capacity=10 and eviction, the count should be significantly bounded.
assert!(
count < 200,
"co-engagement eviction should bound edge count; got {count}"
);
// Verify that the index is still functional: scoring works.
// Recent edges should still be queryable.
assert!(index.edge_count() > 0, "index should not be empty");
}
// ── Test 8: Co-engagement checkpoint/restore ────────────────────────────────
#[test]
fn co_engagement_checkpoint_restore() {
let dir = tempfile::tempdir().unwrap();
let schema = m6_social_schema();
// Phase 1: open, record co-engagement, shutdown (triggers checkpoint).
{
let db = TidalDb::builder()
.with_data_dir(dir.path())
.with_schema(schema.clone())
.open()
.unwrap();
// Write items.
for id in [10u64, 20, 30] {
db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100))
.unwrap();
}
// Build co-engagement edges via direct insert (simulates prior engagement).
let co_eng = db.co_engagement();
co_eng.insert_edge(10, 20, 5.0);
co_eng.insert_edge(10, 30, 2.5);
co_eng.insert_edge(20, 30, 1.0);
assert_eq!(co_eng.edge_count(), 3);
// Shutdown triggers checkpoint.
db.close().unwrap();
}
// Phase 2: reopen and verify co-engagement edges survived.
{
let db = TidalDb::builder()
.with_data_dir(dir.path())
.with_schema(schema)
.open()
.unwrap();
let co_eng = db.co_engagement();
// Edges should be restored from checkpoint.
assert!(
co_eng.edge_count() >= 3,
"co-engagement edges should survive restart; got {}",
co_eng.edge_count()
);
// Verify specific edge weights.
let score_10_20 = co_eng.score(EntityId::new(10), EntityId::new(20));
assert!(
(score_10_20 - 5.0).abs() < f32::EPSILON,
"edge (10, 20) weight should be 5.0 after restore; got {score_10_20}"
);
let score_10_30 = co_eng.score(EntityId::new(10), EntityId::new(30));
assert!(
(score_10_30 - 2.5).abs() < f32::EPSILON,
"edge (10, 30) weight should be 2.5 after restore; got {score_10_30}"
);
let score_20_30 = co_eng.score(EntityId::new(20), EntityId::new(30));
assert!(
(score_20_30 - 1.0).abs() < f32::EPSILON,
"edge (20, 30) weight should be 1.0 after restore; got {score_20_30}"
);
db.close().unwrap();
}
}