//! Milestone 6 Phase 2 Integration Tests: Social Graph Extension + Collaborative Filtering. //! //! Exercises the complete M6P2 scenario end-to-end: //! //! 1. Reverse relationship index (creator -> follower user IDs). //! 2. `FilterExpr::SocialGraph` constraining candidates at depth=1 and depth=2. //! 3. Co-engagement index: pairwise item co-occurrence tracking and scoring. //! 4. `related` profile boosted by co-engagement edges. //! 5. Social-graph-scoped trending using `UserSignalIndex`. //! 6. Co-engagement LRU eviction at capacity. //! 7. Co-engagement checkpoint/restore across reopen. #![allow(clippy::unwrap_used, clippy::cast_precision_loss)] use std::collections::HashMap; use std::time::Duration; use tidaldb::TidalDb; use tidaldb::entities::RelationshipType; use tidaldb::query::retrieve::Retrieve; use tidaldb::schema::{DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window}; use tidaldb::storage::indexes::filter::FilterExpr; // ── Schema ────────────────────────────────────────────────────────────────── fn m6_social_schema() -> tidaldb::schema::Schema { let mut builder = SchemaBuilder::new(); for &(name, half_life_days) in &[ ("view", 7), ("like", 14), ("share", 7), ("skip", 1), ("completion", 14), ("dislike", 1), ("hide", 1), ] { let _ = builder .signal( name, EntityKind::Item, DecaySpec::Exponential { half_life: Duration::from_secs(half_life_days * 24 * 3600), }, ) .windows(&[ Window::OneHour, Window::TwentyFourHours, Window::SevenDays, Window::AllTime, ]) .velocity(true) .add(); } builder.build().expect("m6_social schema must be valid") } // ── Helpers ───────────────────────────────────────────────────────────────── fn item_metadata(category: &str, creator_id: u64) -> HashMap { let mut meta = HashMap::new(); meta.insert("category".to_string(), category.to_string()); meta.insert("format".to_string(), "video".to_string()); meta.insert("creator_id".to_string(), creator_id.to_string()); meta.insert( "created_at".to_string(), Timestamp::now().as_nanos().to_string(), ); meta } fn open_ephemeral_db() -> TidalDb { TidalDb::builder() .ephemeral() .with_schema(m6_social_schema()) .open() .expect("db open") } // ── Test 1: Reverse relationship index ────────────────────────────────────── #[test] fn reverse_index_follows_creator() { let db = open_ephemeral_db(); let ts = Timestamp::now(); // Users 1, 2, 3 follow creator 100. for user_id in 1..=3u64 { db.write_relationship( EntityId::new(user_id), RelationshipType::Follows, EntityId::new(100), 1.0, ts, ) .unwrap(); } // Verify reverse index: creator 100 should have followers {1, 2, 3}. let followers = db.user_state().follower_ids(100); assert_eq!(followers.len(), 3, "creator 100 should have 3 followers"); for uid in &[1u64, 2, 3] { assert!( followers.contains(uid), "follower {uid} should be in reverse index" ); } // Unfollow: user 2 unfollows creator 100. db.delete_relationship( EntityId::new(2), RelationshipType::Follows, EntityId::new(100), ) .unwrap(); let followers = db.user_state().follower_ids(100); assert_eq!( followers.len(), 2, "creator 100 should have 2 followers after unfollow" ); assert!(!followers.contains(&2), "user 2 should be removed"); assert!(followers.contains(&1)); assert!(followers.contains(&3)); } // ── Test 2: Social graph depth=1 filter ───────────────────────────────────── #[test] fn social_graph_depth1_filter_constrains_to_followed_creators() { let db = open_ephemeral_db(); let ts = Timestamp::now(); // Creator 100 has items 10, 11. for id in [10u64, 11] { db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100)) .unwrap(); } // Creator 200 has items 20, 21. for id in [20u64, 21] { db.write_item_with_metadata(EntityId::new(id), &item_metadata("blues", 200)) .unwrap(); } // Creator 300 has item 30 (not followed by user 1). db.write_item_with_metadata(EntityId::new(30), &item_metadata("rock", 300)) .unwrap(); // User 1 follows creator 100 and 200. db.write_relationship( EntityId::new(1), RelationshipType::Follows, EntityId::new(100), 1.0, ts, ) .unwrap(); db.write_relationship( EntityId::new(1), RelationshipType::Follows, EntityId::new(200), 1.0, ts, ) .unwrap(); // Query with SocialGraph(user_id=1, depth=1) filter. let query = Retrieve::builder() .profile("new") .for_user(1) .filter(FilterExpr::social_graph(1, 1)) .limit(20) .build() .unwrap(); let results = db.retrieve(&query).unwrap(); let ids: Vec = results.items.iter().map(|r| r.entity_id.as_u64()).collect(); // Should include items from creator 100 and 200, but NOT creator 300. assert!(ids.contains(&10), "item 10 from followed creator 100"); assert!(ids.contains(&11), "item 11 from followed creator 100"); assert!(ids.contains(&20), "item 20 from followed creator 200"); assert!(ids.contains(&21), "item 21 from followed creator 200"); assert!( !ids.contains(&30), "item 30 from unfollowed creator 300 should be excluded" ); } // ── Test 3: Social graph depth=2 expands to follower community ────────────── #[test] fn social_graph_depth2_expands_to_follower_community() { let db = open_ephemeral_db(); let ts = Timestamp::now(); // Creator 100 has item 10. db.write_item_with_metadata(EntityId::new(10), &item_metadata("jazz", 100)) .unwrap(); // Creator 200 has item 20 (not followed by user 1). db.write_item_with_metadata(EntityId::new(20), &item_metadata("blues", 200)) .unwrap(); // Item 50 exists but belongs to no followed creator. db.write_item_with_metadata(EntityId::new(50), &item_metadata("rock", 300)) .unwrap(); // User 1 follows creator 100. db.write_relationship( EntityId::new(1), RelationshipType::Follows, EntityId::new(100), 1.0, ts, ) .unwrap(); // User 2 also follows creator 100. db.write_relationship( EntityId::new(2), RelationshipType::Follows, EntityId::new(100), 1.0, ts, ) .unwrap(); // User 2 has seen item 20 (from creator 200, not followed by user 1). // This simulates depth-2 expansion: user 2 is a co-follower of creator 100, // so items seen by user 2 become visible at depth=2. db.signal_with_context("view", EntityId::new(20), 1.0, ts, Some(2), Some(200)) .unwrap(); // Query with SocialGraph(user_id=1, depth=2). let query = Retrieve::builder() .profile("new") .for_user(1) .filter(FilterExpr::social_graph(1, 2)) .limit(20) .build() .unwrap(); let results = db.retrieve(&query).unwrap(); let ids: Vec = results.items.iter().map(|r| r.entity_id.as_u64()).collect(); // depth-1: item 10 (from creator 100) assert!(ids.contains(&10), "item 10 from followed creator 100"); // depth-2: item 20 (seen by user 2, co-follower of creator 100) assert!( ids.contains(&20), "item 20 should appear at depth=2 (seen by co-follower)" ); } // ── Test 4: Co-engagement recording and scoring ───────────────────────────── #[test] fn co_engagement_recording_and_scoring() { let db = open_ephemeral_db(); let ts = Timestamp::now(); // Write items. for id in [10u64, 20, 30] { db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100)) .unwrap(); } // User 1 engages positively with items 10, 20, 30 in sequence. // "like" is a positive engagement signal. db.signal_with_context("like", EntityId::new(10), 1.0, ts, Some(1), Some(100)) .unwrap(); db.signal_with_context("like", EntityId::new(20), 1.0, ts, Some(1), Some(100)) .unwrap(); db.signal_with_context("like", EntityId::new(30), 1.0, ts, Some(1), Some(100)) .unwrap(); // Co-engagement should record (20, 10), (30, 10), (30, 20) edges. let co_eng = db.co_engagement(); assert!( co_eng.score(EntityId::new(20), EntityId::new(10)) > 0.0, "co-engagement (20, 10) should be positive" ); assert!( co_eng.score(EntityId::new(30), EntityId::new(10)) > 0.0, "co-engagement (30, 10) should be positive" ); assert!( co_eng.score(EntityId::new(30), EntityId::new(20)) > 0.0, "co-engagement (30, 20) should be positive" ); // Asymmetric: reverse direction should be 0. assert_eq!( co_eng.score(EntityId::new(10), EntityId::new(20)), 0.0, "co-engagement is asymmetric: (10, 20) should be 0" ); // User 2 also likes items 10 and 20 -> (20, 10) weight should increment. db.signal_with_context("like", EntityId::new(10), 1.0, ts, Some(2), Some(100)) .unwrap(); db.signal_with_context("like", EntityId::new(20), 1.0, ts, Some(2), Some(100)) .unwrap(); assert!( co_eng.score(EntityId::new(20), EntityId::new(10)) > 1.0, "co-engagement (20, 10) should increment from two users" ); } // ── Test 5: Related profile boosts co-engaged items ───────────────────────── #[test] fn related_profile_boosts_co_engaged_items() { let db = open_ephemeral_db(); let ts = Timestamp::now(); // Write 5 items. for id in 1..=5u64 { db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100)) .unwrap(); // Give all items the same view signal so base scores are equal. db.signal("view", EntityId::new(id), 10.0, ts).unwrap(); } // Build co-engagement edges: seed=1 -> items 2 and 3 (strong), item 4 (weak), item 5 (none). let co_eng = db.co_engagement(); co_eng.insert_edge(1, 2, 5.0); co_eng.insert_edge(1, 3, 3.0); co_eng.insert_edge(1, 4, 0.5); // Item 5 has no co-engagement with item 1. // RETRIEVE with `related` profile and similar_to=1. // Exclude the seed item explicitly (RETRIEVE does not auto-exclude similar_to). let query = Retrieve::builder() .profile("related") .similar_to(EntityId::new(1)) .exclude(vec![EntityId::new(1)]) .limit(5) .build() .unwrap(); let results = db.retrieve(&query).unwrap(); let ids: Vec = results.items.iter().map(|r| r.entity_id.as_u64()).collect(); // Item 1 should be excluded via the exclude list. assert!( !ids.contains(&1), "seed item 1 should not appear in results" ); // Items 2 and 3 should rank higher than items 4 and 5 due to co-engagement boost. // Find rank positions. if results.items.len() >= 3 { let pos_2 = ids.iter().position(|&id| id == 2); let pos_5 = ids.iter().position(|&id| id == 5); // Item 2 (co-eng weight 5.0) should rank ahead of item 5 (no co-eng). if let (Some(p2), Some(p5)) = (pos_2, pos_5) { assert!( p2 < p5, "item 2 (co-engaged) should rank ahead of item 5 (no co-engagement): pos_2={p2}, pos_5={p5}" ); } } } // ── Test 6: Social-graph-scoped trending differs from global ──────────────── #[test] fn social_trending_differs_from_global_trending() { let db = open_ephemeral_db(); let base_ts_ns = 1_708_000_000_000_000_000u64; let ts = Timestamp::from_nanos(base_ts_ns); // Write items. for id in 1..=3u64 { db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100)) .unwrap(); } // Creator 100 exists. let mut creator_meta = HashMap::new(); creator_meta.insert("name".to_string(), "creator100".to_string()); db.write_creator(EntityId::new(100), &creator_meta).unwrap(); // User 1 follows creator 100. db.write_relationship( EntityId::new(1), RelationshipType::Follows, EntityId::new(100), 1.0, ts, ) .unwrap(); // User 2 follows creator 100 (co-follower). db.write_relationship( EntityId::new(2), RelationshipType::Follows, EntityId::new(100), 1.0, ts, ) .unwrap(); // User 2 (in social graph) views item 1 heavily. for i in 0..100u64 { let t = Timestamp::from_nanos(base_ts_ns + i * 1_000_000_000); db.signal_with_context("view", EntityId::new(1), 1.0, t, Some(2), Some(100)) .unwrap(); } // User 99 (NOT in social graph) views item 2 heavily. for i in 0..200u64 { let t = Timestamp::from_nanos(base_ts_ns + i * 1_000_000_000); db.signal_with_context("view", EntityId::new(2), 1.0, t, Some(99), Some(100)) .unwrap(); } // Global trending: item 2 should rank higher (200 views vs 100). let global_query = Retrieve::builder() .profile("trending") .limit(10) .build() .unwrap(); let global_results = db.retrieve(&global_query).unwrap(); let global_ids: Vec = global_results .items .iter() .map(|r| r.entity_id.as_u64()) .collect(); // Social-scoped trending for user 1: only signals from user 2 (co-follower) // should be counted. Item 1 has 100 views from user 2; item 2 has 0 from user 2. let social_query = Retrieve::builder() .profile("trending") .for_user(1) .filter(FilterExpr::social_graph(1, 1)) .limit(10) .build() .unwrap(); let social_results = db.retrieve(&social_query).unwrap(); let social_ids: Vec = social_results .items .iter() .map(|r| r.entity_id.as_u64()) .collect(); // The key assertion: social-scoped results should have different ranking // than global results. In social scope, item 1 should rank higher than item 2 // because user 2 (co-follower) viewed item 1 heavily, while item 2's views // came from user 99 (outside the social graph). if !social_ids.is_empty() && !global_ids.is_empty() { // In the social-scoped query, item 1 should appear and ideally rank first. if let Some(pos_1) = social_ids.iter().position(|&id| id == 1) { let pos_2 = social_ids.iter().position(|&id| id == 2); // If both appear, item 1 should rank above item 2 in social scope. if let Some(p2) = pos_2 { assert!( pos_1 < p2, "social trending: item 1 (100 views from co-follower) should rank above item 2 (0 views from co-follower): pos_1={pos_1}, pos_2={p2}" ); } } } } // ── Test 7: Co-engagement LRU eviction at capacity ────────────────────────── #[test] fn co_engagement_lru_eviction_at_capacity() { // Directly test the CoEngagementIndex with a small capacity. let index = tidaldb::entities::CoEngagementIndex::with_capacity(10); // Build many co-engagement edges: 20 users each liking items 1..5. // Each record_positive call adds up to N-1 edges but evicts at most 1. for user_id in 1..=20u64 { for item_id in 1..=5u64 { index.record_positive(user_id, EntityId::new(item_id)); } } // The edge count should be bounded -- eviction should have removed // some edges rather than allowing unbounded growth. let count = index.edge_count(); // Theoretical max without eviction: many hundreds of edges. // With capacity=10 and eviction, the count should be significantly bounded. assert!( count < 200, "co-engagement eviction should bound edge count; got {count}" ); // Verify that the index is still functional: scoring works. // Recent edges should still be queryable. assert!(index.edge_count() > 0, "index should not be empty"); } // ── Test 8: Co-engagement checkpoint/restore ──────────────────────────────── #[test] fn co_engagement_checkpoint_restore() { let dir = tempfile::tempdir().unwrap(); let schema = m6_social_schema(); // Phase 1: open, record co-engagement, shutdown (triggers checkpoint). { let db = TidalDb::builder() .with_data_dir(dir.path()) .with_schema(schema.clone()) .open() .unwrap(); // Write items. for id in [10u64, 20, 30] { db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100)) .unwrap(); } // Build co-engagement edges via direct insert (simulates prior engagement). let co_eng = db.co_engagement(); co_eng.insert_edge(10, 20, 5.0); co_eng.insert_edge(10, 30, 2.5); co_eng.insert_edge(20, 30, 1.0); assert_eq!(co_eng.edge_count(), 3); // Shutdown triggers checkpoint. db.close().unwrap(); } // Phase 2: reopen and verify co-engagement edges survived. { let db = TidalDb::builder() .with_data_dir(dir.path()) .with_schema(schema) .open() .unwrap(); let co_eng = db.co_engagement(); // Edges should be restored from checkpoint. assert!( co_eng.edge_count() >= 3, "co-engagement edges should survive restart; got {}", co_eng.edge_count() ); // Verify specific edge weights. let score_10_20 = co_eng.score(EntityId::new(10), EntityId::new(20)); assert!( (score_10_20 - 5.0).abs() < f32::EPSILON, "edge (10, 20) weight should be 5.0 after restore; got {score_10_20}" ); let score_10_30 = co_eng.score(EntityId::new(10), EntityId::new(30)); assert!( (score_10_30 - 2.5).abs() < f32::EPSILON, "edge (10, 30) weight should be 2.5 after restore; got {score_10_30}" ); let score_20_30 = co_eng.score(EntityId::new(20), EntityId::new(30)); assert!( (score_20_30 - 1.0).abs() < f32::EPSILON, "edge (20, 30) weight should be 1.0 after restore; got {score_20_30}" ); db.close().unwrap(); } }