575 lines
19 KiB
Rust
575 lines
19 KiB
Rust
//! Milestone 6 Phase 2 Integration Tests: Social Graph Extension + Collaborative Filtering.
|
|
//!
|
|
//! Exercises the complete M6P2 scenario end-to-end:
|
|
//!
|
|
//! 1. Reverse relationship index (creator -> follower user IDs).
|
|
//! 2. `FilterExpr::SocialGraph` constraining candidates at depth=1 and depth=2.
|
|
//! 3. Co-engagement index: pairwise item co-occurrence tracking and scoring.
|
|
//! 4. `related` profile boosted by co-engagement edges.
|
|
//! 5. Social-graph-scoped trending using `UserSignalIndex`.
|
|
//! 6. Co-engagement LRU eviction at capacity.
|
|
//! 7. Co-engagement checkpoint/restore across reopen.
|
|
|
|
#![allow(clippy::unwrap_used, clippy::cast_precision_loss)]
|
|
|
|
use std::collections::HashMap;
|
|
use std::time::Duration;
|
|
|
|
use tidaldb::TidalDb;
|
|
use tidaldb::entities::RelationshipType;
|
|
use tidaldb::query::retrieve::Retrieve;
|
|
use tidaldb::schema::{DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window};
|
|
use tidaldb::storage::indexes::filter::FilterExpr;
|
|
|
|
// ── Schema ──────────────────────────────────────────────────────────────────
|
|
|
|
fn m6_social_schema() -> tidaldb::schema::Schema {
|
|
let mut builder = SchemaBuilder::new();
|
|
|
|
for &(name, half_life_days) in &[
|
|
("view", 7),
|
|
("like", 14),
|
|
("share", 7),
|
|
("skip", 1),
|
|
("completion", 14),
|
|
("dislike", 1),
|
|
("hide", 1),
|
|
] {
|
|
let _ = builder
|
|
.signal(
|
|
name,
|
|
EntityKind::Item,
|
|
DecaySpec::Exponential {
|
|
half_life: Duration::from_secs(half_life_days * 24 * 3600),
|
|
},
|
|
)
|
|
.windows(&[
|
|
Window::OneHour,
|
|
Window::TwentyFourHours,
|
|
Window::SevenDays,
|
|
Window::AllTime,
|
|
])
|
|
.velocity(true)
|
|
.add();
|
|
}
|
|
|
|
builder.build().expect("m6_social schema must be valid")
|
|
}
|
|
|
|
// ── Helpers ─────────────────────────────────────────────────────────────────
|
|
|
|
fn item_metadata(category: &str, creator_id: u64) -> HashMap<String, String> {
|
|
let mut meta = HashMap::new();
|
|
meta.insert("category".to_string(), category.to_string());
|
|
meta.insert("format".to_string(), "video".to_string());
|
|
meta.insert("creator_id".to_string(), creator_id.to_string());
|
|
meta.insert(
|
|
"created_at".to_string(),
|
|
Timestamp::now().as_nanos().to_string(),
|
|
);
|
|
meta
|
|
}
|
|
|
|
fn open_ephemeral_db() -> TidalDb {
|
|
TidalDb::builder()
|
|
.ephemeral()
|
|
.with_schema(m6_social_schema())
|
|
.open()
|
|
.expect("db open")
|
|
}
|
|
|
|
// ── Test 1: Reverse relationship index ──────────────────────────────────────
|
|
|
|
#[test]
|
|
fn reverse_index_follows_creator() {
|
|
let db = open_ephemeral_db();
|
|
let ts = Timestamp::now();
|
|
|
|
// Users 1, 2, 3 follow creator 100.
|
|
for user_id in 1..=3u64 {
|
|
db.write_relationship(
|
|
EntityId::new(user_id),
|
|
RelationshipType::Follows,
|
|
EntityId::new(100),
|
|
1.0,
|
|
ts,
|
|
)
|
|
.unwrap();
|
|
}
|
|
|
|
// Verify reverse index: creator 100 should have followers {1, 2, 3}.
|
|
let followers = db.user_state().follower_ids(100);
|
|
assert_eq!(followers.len(), 3, "creator 100 should have 3 followers");
|
|
for uid in &[1u64, 2, 3] {
|
|
assert!(
|
|
followers.contains(uid),
|
|
"follower {uid} should be in reverse index"
|
|
);
|
|
}
|
|
|
|
// Unfollow: user 2 unfollows creator 100.
|
|
db.delete_relationship(
|
|
EntityId::new(2),
|
|
RelationshipType::Follows,
|
|
EntityId::new(100),
|
|
)
|
|
.unwrap();
|
|
|
|
let followers = db.user_state().follower_ids(100);
|
|
assert_eq!(
|
|
followers.len(),
|
|
2,
|
|
"creator 100 should have 2 followers after unfollow"
|
|
);
|
|
assert!(!followers.contains(&2), "user 2 should be removed");
|
|
assert!(followers.contains(&1));
|
|
assert!(followers.contains(&3));
|
|
}
|
|
|
|
// ── Test 2: Social graph depth=1 filter ─────────────────────────────────────
|
|
|
|
#[test]
|
|
fn social_graph_depth1_filter_constrains_to_followed_creators() {
|
|
let db = open_ephemeral_db();
|
|
let ts = Timestamp::now();
|
|
|
|
// Creator 100 has items 10, 11.
|
|
for id in [10u64, 11] {
|
|
db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100))
|
|
.unwrap();
|
|
}
|
|
// Creator 200 has items 20, 21.
|
|
for id in [20u64, 21] {
|
|
db.write_item_with_metadata(EntityId::new(id), &item_metadata("blues", 200))
|
|
.unwrap();
|
|
}
|
|
// Creator 300 has item 30 (not followed by user 1).
|
|
db.write_item_with_metadata(EntityId::new(30), &item_metadata("rock", 300))
|
|
.unwrap();
|
|
|
|
// User 1 follows creator 100 and 200.
|
|
db.write_relationship(
|
|
EntityId::new(1),
|
|
RelationshipType::Follows,
|
|
EntityId::new(100),
|
|
1.0,
|
|
ts,
|
|
)
|
|
.unwrap();
|
|
db.write_relationship(
|
|
EntityId::new(1),
|
|
RelationshipType::Follows,
|
|
EntityId::new(200),
|
|
1.0,
|
|
ts,
|
|
)
|
|
.unwrap();
|
|
|
|
// Query with SocialGraph(user_id=1, depth=1) filter.
|
|
let query = Retrieve::builder()
|
|
.profile("new")
|
|
.for_user(1)
|
|
.filter(FilterExpr::social_graph(1, 1))
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let results = db.retrieve(&query).unwrap();
|
|
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
|
|
|
|
// Should include items from creator 100 and 200, but NOT creator 300.
|
|
assert!(ids.contains(&10), "item 10 from followed creator 100");
|
|
assert!(ids.contains(&11), "item 11 from followed creator 100");
|
|
assert!(ids.contains(&20), "item 20 from followed creator 200");
|
|
assert!(ids.contains(&21), "item 21 from followed creator 200");
|
|
assert!(
|
|
!ids.contains(&30),
|
|
"item 30 from unfollowed creator 300 should be excluded"
|
|
);
|
|
}
|
|
|
|
// ── Test 3: Social graph depth=2 expands to follower community ──────────────
|
|
|
|
#[test]
|
|
fn social_graph_depth2_expands_to_follower_community() {
|
|
let db = open_ephemeral_db();
|
|
let ts = Timestamp::now();
|
|
|
|
// Creator 100 has item 10.
|
|
db.write_item_with_metadata(EntityId::new(10), &item_metadata("jazz", 100))
|
|
.unwrap();
|
|
|
|
// Creator 200 has item 20 (not followed by user 1).
|
|
db.write_item_with_metadata(EntityId::new(20), &item_metadata("blues", 200))
|
|
.unwrap();
|
|
|
|
// Item 50 exists but belongs to no followed creator.
|
|
db.write_item_with_metadata(EntityId::new(50), &item_metadata("rock", 300))
|
|
.unwrap();
|
|
|
|
// User 1 follows creator 100.
|
|
db.write_relationship(
|
|
EntityId::new(1),
|
|
RelationshipType::Follows,
|
|
EntityId::new(100),
|
|
1.0,
|
|
ts,
|
|
)
|
|
.unwrap();
|
|
|
|
// User 2 also follows creator 100.
|
|
db.write_relationship(
|
|
EntityId::new(2),
|
|
RelationshipType::Follows,
|
|
EntityId::new(100),
|
|
1.0,
|
|
ts,
|
|
)
|
|
.unwrap();
|
|
|
|
// User 2 has seen item 20 (from creator 200, not followed by user 1).
|
|
// This simulates depth-2 expansion: user 2 is a co-follower of creator 100,
|
|
// so items seen by user 2 become visible at depth=2.
|
|
db.signal_with_context("view", EntityId::new(20), 1.0, ts, Some(2), Some(200))
|
|
.unwrap();
|
|
|
|
// Query with SocialGraph(user_id=1, depth=2).
|
|
let query = Retrieve::builder()
|
|
.profile("new")
|
|
.for_user(1)
|
|
.filter(FilterExpr::social_graph(1, 2))
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let results = db.retrieve(&query).unwrap();
|
|
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
|
|
|
|
// depth-1: item 10 (from creator 100)
|
|
assert!(ids.contains(&10), "item 10 from followed creator 100");
|
|
// depth-2: item 20 (seen by user 2, co-follower of creator 100)
|
|
assert!(
|
|
ids.contains(&20),
|
|
"item 20 should appear at depth=2 (seen by co-follower)"
|
|
);
|
|
}
|
|
|
|
// ── Test 4: Co-engagement recording and scoring ─────────────────────────────
|
|
|
|
#[test]
|
|
fn co_engagement_recording_and_scoring() {
|
|
let db = open_ephemeral_db();
|
|
let ts = Timestamp::now();
|
|
|
|
// Write items.
|
|
for id in [10u64, 20, 30] {
|
|
db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100))
|
|
.unwrap();
|
|
}
|
|
|
|
// User 1 engages positively with items 10, 20, 30 in sequence.
|
|
// "like" is a positive engagement signal.
|
|
db.signal_with_context("like", EntityId::new(10), 1.0, ts, Some(1), Some(100))
|
|
.unwrap();
|
|
db.signal_with_context("like", EntityId::new(20), 1.0, ts, Some(1), Some(100))
|
|
.unwrap();
|
|
db.signal_with_context("like", EntityId::new(30), 1.0, ts, Some(1), Some(100))
|
|
.unwrap();
|
|
|
|
// Co-engagement should record (20, 10), (30, 10), (30, 20) edges.
|
|
let co_eng = db.co_engagement();
|
|
assert!(
|
|
co_eng.score(EntityId::new(20), EntityId::new(10)) > 0.0,
|
|
"co-engagement (20, 10) should be positive"
|
|
);
|
|
assert!(
|
|
co_eng.score(EntityId::new(30), EntityId::new(10)) > 0.0,
|
|
"co-engagement (30, 10) should be positive"
|
|
);
|
|
assert!(
|
|
co_eng.score(EntityId::new(30), EntityId::new(20)) > 0.0,
|
|
"co-engagement (30, 20) should be positive"
|
|
);
|
|
|
|
// Asymmetric: reverse direction should be 0.
|
|
assert_eq!(
|
|
co_eng.score(EntityId::new(10), EntityId::new(20)),
|
|
0.0,
|
|
"co-engagement is asymmetric: (10, 20) should be 0"
|
|
);
|
|
|
|
// User 2 also likes items 10 and 20 -> (20, 10) weight should increment.
|
|
db.signal_with_context("like", EntityId::new(10), 1.0, ts, Some(2), Some(100))
|
|
.unwrap();
|
|
db.signal_with_context("like", EntityId::new(20), 1.0, ts, Some(2), Some(100))
|
|
.unwrap();
|
|
|
|
assert!(
|
|
co_eng.score(EntityId::new(20), EntityId::new(10)) > 1.0,
|
|
"co-engagement (20, 10) should increment from two users"
|
|
);
|
|
}
|
|
|
|
// ── Test 5: Related profile boosts co-engaged items ─────────────────────────
|
|
|
|
#[test]
|
|
fn related_profile_boosts_co_engaged_items() {
|
|
let db = open_ephemeral_db();
|
|
let ts = Timestamp::now();
|
|
|
|
// Write 5 items.
|
|
for id in 1..=5u64 {
|
|
db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100))
|
|
.unwrap();
|
|
// Give all items the same view signal so base scores are equal.
|
|
db.signal("view", EntityId::new(id), 10.0, ts).unwrap();
|
|
}
|
|
|
|
// Build co-engagement edges: seed=1 -> items 2 and 3 (strong), item 4 (weak), item 5 (none).
|
|
let co_eng = db.co_engagement();
|
|
co_eng.insert_edge(1, 2, 5.0);
|
|
co_eng.insert_edge(1, 3, 3.0);
|
|
co_eng.insert_edge(1, 4, 0.5);
|
|
// Item 5 has no co-engagement with item 1.
|
|
|
|
// RETRIEVE with `related` profile and similar_to=1.
|
|
// Exclude the seed item explicitly (RETRIEVE does not auto-exclude similar_to).
|
|
let query = Retrieve::builder()
|
|
.profile("related")
|
|
.similar_to(EntityId::new(1))
|
|
.exclude(vec![EntityId::new(1)])
|
|
.limit(5)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let results = db.retrieve(&query).unwrap();
|
|
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
|
|
|
|
// Item 1 should be excluded via the exclude list.
|
|
assert!(
|
|
!ids.contains(&1),
|
|
"seed item 1 should not appear in results"
|
|
);
|
|
|
|
// Items 2 and 3 should rank higher than items 4 and 5 due to co-engagement boost.
|
|
// Find rank positions.
|
|
if results.items.len() >= 3 {
|
|
let pos_2 = ids.iter().position(|&id| id == 2);
|
|
let pos_5 = ids.iter().position(|&id| id == 5);
|
|
|
|
// Item 2 (co-eng weight 5.0) should rank ahead of item 5 (no co-eng).
|
|
if let (Some(p2), Some(p5)) = (pos_2, pos_5) {
|
|
assert!(
|
|
p2 < p5,
|
|
"item 2 (co-engaged) should rank ahead of item 5 (no co-engagement): pos_2={p2}, pos_5={p5}"
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── Test 6: Social-graph-scoped trending differs from global ────────────────
|
|
|
|
#[test]
|
|
fn social_trending_differs_from_global_trending() {
|
|
let db = open_ephemeral_db();
|
|
let base_ts_ns = 1_708_000_000_000_000_000u64;
|
|
let ts = Timestamp::from_nanos(base_ts_ns);
|
|
|
|
// Write items.
|
|
for id in 1..=3u64 {
|
|
db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100))
|
|
.unwrap();
|
|
}
|
|
|
|
// Creator 100 exists.
|
|
let mut creator_meta = HashMap::new();
|
|
creator_meta.insert("name".to_string(), "creator100".to_string());
|
|
db.write_creator(EntityId::new(100), &creator_meta).unwrap();
|
|
|
|
// User 1 follows creator 100.
|
|
db.write_relationship(
|
|
EntityId::new(1),
|
|
RelationshipType::Follows,
|
|
EntityId::new(100),
|
|
1.0,
|
|
ts,
|
|
)
|
|
.unwrap();
|
|
|
|
// User 2 follows creator 100 (co-follower).
|
|
db.write_relationship(
|
|
EntityId::new(2),
|
|
RelationshipType::Follows,
|
|
EntityId::new(100),
|
|
1.0,
|
|
ts,
|
|
)
|
|
.unwrap();
|
|
|
|
// User 2 (in social graph) views item 1 heavily.
|
|
for i in 0..100u64 {
|
|
let t = Timestamp::from_nanos(base_ts_ns + i * 1_000_000_000);
|
|
db.signal_with_context("view", EntityId::new(1), 1.0, t, Some(2), Some(100))
|
|
.unwrap();
|
|
}
|
|
|
|
// User 99 (NOT in social graph) views item 2 heavily.
|
|
for i in 0..200u64 {
|
|
let t = Timestamp::from_nanos(base_ts_ns + i * 1_000_000_000);
|
|
db.signal_with_context("view", EntityId::new(2), 1.0, t, Some(99), Some(100))
|
|
.unwrap();
|
|
}
|
|
|
|
// Global trending: item 2 should rank higher (200 views vs 100).
|
|
let global_query = Retrieve::builder()
|
|
.profile("trending")
|
|
.limit(10)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let global_results = db.retrieve(&global_query).unwrap();
|
|
let global_ids: Vec<u64> = global_results
|
|
.items
|
|
.iter()
|
|
.map(|r| r.entity_id.as_u64())
|
|
.collect();
|
|
|
|
// Social-scoped trending for user 1: only signals from user 2 (co-follower)
|
|
// should be counted. Item 1 has 100 views from user 2; item 2 has 0 from user 2.
|
|
let social_query = Retrieve::builder()
|
|
.profile("trending")
|
|
.for_user(1)
|
|
.filter(FilterExpr::social_graph(1, 1))
|
|
.limit(10)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let social_results = db.retrieve(&social_query).unwrap();
|
|
let social_ids: Vec<u64> = social_results
|
|
.items
|
|
.iter()
|
|
.map(|r| r.entity_id.as_u64())
|
|
.collect();
|
|
|
|
// The key assertion: social-scoped results should have different ranking
|
|
// than global results. In social scope, item 1 should rank higher than item 2
|
|
// because user 2 (co-follower) viewed item 1 heavily, while item 2's views
|
|
// came from user 99 (outside the social graph).
|
|
if !social_ids.is_empty() && !global_ids.is_empty() {
|
|
// In the social-scoped query, item 1 should appear and ideally rank first.
|
|
if let Some(pos_1) = social_ids.iter().position(|&id| id == 1) {
|
|
let pos_2 = social_ids.iter().position(|&id| id == 2);
|
|
// If both appear, item 1 should rank above item 2 in social scope.
|
|
if let Some(p2) = pos_2 {
|
|
assert!(
|
|
pos_1 < p2,
|
|
"social trending: item 1 (100 views from co-follower) should rank above item 2 (0 views from co-follower): pos_1={pos_1}, pos_2={p2}"
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── Test 7: Co-engagement LRU eviction at capacity ──────────────────────────
|
|
|
|
#[test]
|
|
fn co_engagement_lru_eviction_at_capacity() {
|
|
// Directly test the CoEngagementIndex with a small capacity.
|
|
let index = tidaldb::entities::CoEngagementIndex::with_capacity(10);
|
|
|
|
// Build many co-engagement edges: 20 users each liking items 1..5.
|
|
// Each record_positive call adds up to N-1 edges but evicts at most 1.
|
|
for user_id in 1..=20u64 {
|
|
for item_id in 1..=5u64 {
|
|
index.record_positive(user_id, EntityId::new(item_id));
|
|
}
|
|
}
|
|
|
|
// The edge count should be bounded -- eviction should have removed
|
|
// some edges rather than allowing unbounded growth.
|
|
let count = index.edge_count();
|
|
// Theoretical max without eviction: many hundreds of edges.
|
|
// With capacity=10 and eviction, the count should be significantly bounded.
|
|
assert!(
|
|
count < 200,
|
|
"co-engagement eviction should bound edge count; got {count}"
|
|
);
|
|
|
|
// Verify that the index is still functional: scoring works.
|
|
// Recent edges should still be queryable.
|
|
assert!(index.edge_count() > 0, "index should not be empty");
|
|
}
|
|
|
|
// ── Test 8: Co-engagement checkpoint/restore ────────────────────────────────
|
|
|
|
#[test]
|
|
fn co_engagement_checkpoint_restore() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let schema = m6_social_schema();
|
|
|
|
// Phase 1: open, record co-engagement, shutdown (triggers checkpoint).
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema.clone())
|
|
.open()
|
|
.unwrap();
|
|
|
|
// Write items.
|
|
for id in [10u64, 20, 30] {
|
|
db.write_item_with_metadata(EntityId::new(id), &item_metadata("jazz", 100))
|
|
.unwrap();
|
|
}
|
|
|
|
// Build co-engagement edges via direct insert (simulates prior engagement).
|
|
let co_eng = db.co_engagement();
|
|
co_eng.insert_edge(10, 20, 5.0);
|
|
co_eng.insert_edge(10, 30, 2.5);
|
|
co_eng.insert_edge(20, 30, 1.0);
|
|
|
|
assert_eq!(co_eng.edge_count(), 3);
|
|
|
|
// Shutdown triggers checkpoint.
|
|
db.close().unwrap();
|
|
}
|
|
|
|
// Phase 2: reopen and verify co-engagement edges survived.
|
|
{
|
|
let db = TidalDb::builder()
|
|
.with_data_dir(dir.path())
|
|
.with_schema(schema)
|
|
.open()
|
|
.unwrap();
|
|
|
|
let co_eng = db.co_engagement();
|
|
|
|
// Edges should be restored from checkpoint.
|
|
assert!(
|
|
co_eng.edge_count() >= 3,
|
|
"co-engagement edges should survive restart; got {}",
|
|
co_eng.edge_count()
|
|
);
|
|
|
|
// Verify specific edge weights.
|
|
let score_10_20 = co_eng.score(EntityId::new(10), EntityId::new(20));
|
|
assert!(
|
|
(score_10_20 - 5.0).abs() < f32::EPSILON,
|
|
"edge (10, 20) weight should be 5.0 after restore; got {score_10_20}"
|
|
);
|
|
|
|
let score_10_30 = co_eng.score(EntityId::new(10), EntityId::new(30));
|
|
assert!(
|
|
(score_10_30 - 2.5).abs() < f32::EPSILON,
|
|
"edge (10, 30) weight should be 2.5 after restore; got {score_10_30}"
|
|
);
|
|
|
|
let score_20_30 = co_eng.score(EntityId::new(20), EntityId::new(30));
|
|
assert!(
|
|
(score_20_30 - 1.0).abs() < f32::EPSILON,
|
|
"edge (20, 30) weight should be 1.0 after restore; got {score_20_30}"
|
|
);
|
|
|
|
db.close().unwrap();
|
|
}
|
|
}
|