tidaldb/tidal/tests/m6p3_sorts.rs
2026-02-23 22:41:16 -07:00

773 lines
22 KiB
Rust

//! Milestone 6 Phase 3 Integration Tests: Full Sort Mode Coverage + Live Content + Engagement Filters.
//!
//! Exercises the complete M6P3 feature set end-to-end through `TidalDb`:
//!
//! 1. Alphabetical sort (asc/desc) with missing-title edge case.
//! 2. Duration sort (shortest/longest) with missing-duration edge case.
//! 3. `MostCommented` and `MostShared` signal-based sorts.
//! 4. `LiveViewerCount` sort via `viewer_count` signal.
//! 5. `DateSaved` sort with `FOR USER` context (and error without it).
//! 6. `MinSignal` / `MaxSignal` engagement threshold filters.
//! 7. `NearLocation` geographic post-filter.
//! 8. `with_viewer_count_signal()` convenience method in `SchemaBuilder`.
#![allow(clippy::unwrap_used, clippy::cast_precision_loss)]
use std::collections::HashMap;
use std::time::Duration;
use tidaldb::TidalDb;
use tidaldb::query::retrieve::Retrieve;
use tidaldb::schema::{DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window};
use tidaldb::storage::indexes::filter::FilterExpr;
// ── Schema ──────────────────────────────────────────────────────────────────
fn m6p3_schema() -> tidaldb::schema::Schema {
let mut builder = SchemaBuilder::new();
for &(name, half_life_days) in &[
("view", 7),
("like", 14),
("share", 7),
("comment", 7),
("skip", 1),
("hide", 1),
] {
let _ = builder
.signal(
name,
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(half_life_days * 24 * 3600),
},
)
.windows(&[
Window::OneHour,
Window::TwentyFourHours,
Window::SevenDays,
Window::AllTime,
])
.velocity(true)
.add();
}
// Register viewer_count via convenience method.
builder.with_viewer_count_signal();
builder.build().expect("m6p3 schema must be valid")
}
fn m6p3_schema_without_viewer_count() -> tidaldb::schema::Schema {
let mut builder = SchemaBuilder::new();
for &(name, half_life_days) in &[
("view", 7),
("like", 14),
("share", 7),
("comment", 7),
("skip", 1),
("hide", 1),
] {
let _ = builder
.signal(
name,
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(half_life_days * 24 * 3600),
},
)
.windows(&[
Window::OneHour,
Window::TwentyFourHours,
Window::SevenDays,
Window::AllTime,
])
.velocity(true)
.add();
}
builder.build().expect("schema must be valid")
}
// ── Helpers ─────────────────────────────────────────────────────────────────
fn item_meta(title: &str, category: &str, creator_id: u32) -> HashMap<String, String> {
let mut m = HashMap::new();
m.insert("title".to_string(), title.to_string());
m.insert("category".to_string(), category.to_string());
m.insert("creator_id".to_string(), creator_id.to_string());
m
}
fn item_meta_with_duration(
title: &str,
category: &str,
creator_id: u32,
duration_secs: u32,
) -> HashMap<String, String> {
let mut m = item_meta(title, category, creator_id);
m.insert("duration".to_string(), duration_secs.to_string());
m
}
fn item_meta_with_location(
title: &str,
category: &str,
creator_id: u32,
lat: f64,
lng: f64,
) -> HashMap<String, String> {
let mut m = item_meta(title, category, creator_id);
m.insert("latitude".to_string(), lat.to_string());
m.insert("longitude".to_string(), lng.to_string());
m
}
// ── Tests ───────────────────────────────────────────────────────────────────
#[test]
fn alphabetical_asc_sorts_a_before_z() {
let schema = m6p3_schema_without_viewer_count();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
// Write items with titles in non-alphabetical order.
db.write_item_with_metadata(EntityId::new(1), &item_meta("Zebra Stripes", "music", 100))
.unwrap();
db.write_item_with_metadata(EntityId::new(2), &item_meta("Apple Pie", "food", 100))
.unwrap();
db.write_item_with_metadata(EntityId::new(3), &item_meta("Mango Tango", "food", 100))
.unwrap();
// Give all items a view signal so they appear as candidates.
let ts = Timestamp::now();
for id in 1..=3 {
db.signal("view", EntityId::new(id), 1.0, ts).unwrap();
}
let query = Retrieve::builder()
.profile("alphabetical_asc")
.limit(10)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
assert!(
results.items.len() >= 3,
"should return all 3 items, got {}",
results.items.len()
);
// A < M < Z
assert_eq!(results.items[0].entity_id, EntityId::new(2)); // Apple
assert_eq!(results.items[1].entity_id, EntityId::new(3)); // Mango
assert_eq!(results.items[2].entity_id, EntityId::new(1)); // Zebra
db.close().unwrap();
}
#[test]
fn alphabetical_desc_sorts_z_before_a() {
let schema = m6p3_schema_without_viewer_count();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
db.write_item_with_metadata(EntityId::new(1), &item_meta("Zebra Stripes", "music", 100))
.unwrap();
db.write_item_with_metadata(EntityId::new(2), &item_meta("Apple Pie", "food", 100))
.unwrap();
db.write_item_with_metadata(EntityId::new(3), &item_meta("Mango Tango", "food", 100))
.unwrap();
let ts = Timestamp::now();
for id in 1..=3 {
db.signal("view", EntityId::new(id), 1.0, ts).unwrap();
}
let query = Retrieve::builder()
.profile("alphabetical_desc")
.limit(10)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
assert!(results.items.len() >= 3);
// Z > M > A
assert_eq!(results.items[0].entity_id, EntityId::new(1)); // Zebra
assert_eq!(results.items[1].entity_id, EntityId::new(3)); // Mango
assert_eq!(results.items[2].entity_id, EntityId::new(2)); // Apple
db.close().unwrap();
}
#[test]
fn missing_title_sorted_last_alphabetical() {
let schema = m6p3_schema_without_viewer_count();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
// Item 1 has a title; item 2 has no title; item 3 has a title.
db.write_item_with_metadata(EntityId::new(1), &item_meta("Beta", "music", 100))
.unwrap();
let mut no_title = HashMap::new();
no_title.insert("category".to_string(), "music".to_string());
no_title.insert("creator_id".to_string(), "100".to_string());
db.write_item_with_metadata(EntityId::new(2), &no_title)
.unwrap();
db.write_item_with_metadata(EntityId::new(3), &item_meta("Alpha", "music", 100))
.unwrap();
let ts = Timestamp::now();
for id in 1..=3 {
db.signal("view", EntityId::new(id), 1.0, ts).unwrap();
}
let query = Retrieve::builder()
.profile("alphabetical_asc")
.limit(10)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
assert!(results.items.len() >= 3);
// Alpha, Beta, then no-title (sorted last)
assert_eq!(results.items[0].entity_id, EntityId::new(3)); // Alpha
assert_eq!(results.items[1].entity_id, EntityId::new(1)); // Beta
assert_eq!(results.items[2].entity_id, EntityId::new(2)); // no title
db.close().unwrap();
}
#[test]
fn shortest_sort_shortest_first() {
let schema = m6p3_schema_without_viewer_count();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
db.write_item_with_metadata(
EntityId::new(1),
&item_meta_with_duration("Long", "music", 100, 3600),
)
.unwrap();
db.write_item_with_metadata(
EntityId::new(2),
&item_meta_with_duration("Short", "music", 100, 60),
)
.unwrap();
db.write_item_with_metadata(
EntityId::new(3),
&item_meta_with_duration("Medium", "music", 100, 600),
)
.unwrap();
let ts = Timestamp::now();
for id in 1..=3 {
db.signal("view", EntityId::new(id), 1.0, ts).unwrap();
}
let query = Retrieve::builder()
.profile("shortest")
.limit(10)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
assert!(results.items.len() >= 3);
// Shortest first: 60, 600, 3600
assert_eq!(results.items[0].entity_id, EntityId::new(2)); // 60s
assert_eq!(results.items[1].entity_id, EntityId::new(3)); // 600s
assert_eq!(results.items[2].entity_id, EntityId::new(1)); // 3600s
db.close().unwrap();
}
#[test]
fn longest_sort_longest_first() {
let schema = m6p3_schema_without_viewer_count();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
db.write_item_with_metadata(
EntityId::new(1),
&item_meta_with_duration("Long", "music", 100, 3600),
)
.unwrap();
db.write_item_with_metadata(
EntityId::new(2),
&item_meta_with_duration("Short", "music", 100, 60),
)
.unwrap();
db.write_item_with_metadata(
EntityId::new(3),
&item_meta_with_duration("Medium", "music", 100, 600),
)
.unwrap();
let ts = Timestamp::now();
for id in 1..=3 {
db.signal("view", EntityId::new(id), 1.0, ts).unwrap();
}
let query = Retrieve::builder()
.profile("longest")
.limit(10)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
assert!(results.items.len() >= 3);
// Longest first: 3600, 600, 60
assert_eq!(results.items[0].entity_id, EntityId::new(1)); // 3600s
assert_eq!(results.items[1].entity_id, EntityId::new(3)); // 600s
assert_eq!(results.items[2].entity_id, EntityId::new(2)); // 60s
db.close().unwrap();
}
#[test]
fn missing_duration_sorted_last() {
let schema = m6p3_schema_without_viewer_count();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
db.write_item_with_metadata(
EntityId::new(1),
&item_meta_with_duration("Short", "music", 100, 60),
)
.unwrap();
// Item 2: no duration
db.write_item_with_metadata(EntityId::new(2), &item_meta("NoDuration", "music", 100))
.unwrap();
db.write_item_with_metadata(
EntityId::new(3),
&item_meta_with_duration("Long", "music", 100, 3600),
)
.unwrap();
let ts = Timestamp::now();
for id in 1..=3 {
db.signal("view", EntityId::new(id), 1.0, ts).unwrap();
}
let query = Retrieve::builder()
.profile("shortest")
.limit(10)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
assert!(results.items.len() >= 3);
// No-duration item should be last.
assert_eq!(results.items[2].entity_id, EntityId::new(2));
db.close().unwrap();
}
#[test]
fn most_commented_sort() {
let schema = m6p3_schema();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
for id in 1u64..=3 {
db.write_item_with_metadata(EntityId::new(id), &item_meta("Item", "music", 100))
.unwrap();
}
let ts = Timestamp::now();
// Entity 1: 2 comments, Entity 2: 5 comments, Entity 3: 1 comment.
// Also record a view so they appear as candidates.
for id in 1u64..=3 {
db.signal("view", EntityId::new(id), 1.0, ts).unwrap();
}
for _ in 0..2 {
db.signal("comment", EntityId::new(1), 1.0, ts).unwrap();
}
for _ in 0..5 {
db.signal("comment", EntityId::new(2), 1.0, ts).unwrap();
}
db.signal("comment", EntityId::new(3), 1.0, ts).unwrap();
let query = Retrieve::builder()
.profile("most_commented")
.limit(10)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
assert!(results.items.len() >= 3);
// Entity 2 (5 comments) should rank first.
assert_eq!(results.items[0].entity_id, EntityId::new(2));
db.close().unwrap();
}
#[test]
fn most_shared_sort() {
let schema = m6p3_schema();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
for id in 1u64..=3 {
db.write_item_with_metadata(EntityId::new(id), &item_meta("Item", "music", 100))
.unwrap();
}
let ts = Timestamp::now();
for id in 1u64..=3 {
db.signal("view", EntityId::new(id), 1.0, ts).unwrap();
}
// Entity 1: 1 share, Entity 2: 3 shares, Entity 3: 7 shares.
db.signal("share", EntityId::new(1), 1.0, ts).unwrap();
for _ in 0..3 {
db.signal("share", EntityId::new(2), 1.0, ts).unwrap();
}
for _ in 0..7 {
db.signal("share", EntityId::new(3), 1.0, ts).unwrap();
}
let query = Retrieve::builder()
.profile("most_shared")
.limit(10)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
assert!(results.items.len() >= 3);
// Entity 3 (7 shares) should rank first.
assert_eq!(results.items[0].entity_id, EntityId::new(3));
db.close().unwrap();
}
#[test]
fn live_viewer_count_sort() {
let schema = m6p3_schema();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
for id in 1u64..=3 {
db.write_item_with_metadata(EntityId::new(id), &item_meta("Item", "music", 100))
.unwrap();
}
let ts = Timestamp::now();
// Give each item a view so they appear as candidates.
for id in 1u64..=3 {
db.signal("view", EntityId::new(id), 1.0, ts).unwrap();
}
// Entity 1: 5 viewers, Entity 2: 20 viewers, Entity 3: 1 viewer.
for _ in 0..5 {
db.signal("viewer_count", EntityId::new(1), 1.0, ts)
.unwrap();
}
for _ in 0..20 {
db.signal("viewer_count", EntityId::new(2), 1.0, ts)
.unwrap();
}
db.signal("viewer_count", EntityId::new(3), 1.0, ts)
.unwrap();
let query = Retrieve::builder()
.profile("live")
.limit(10)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
assert!(results.items.len() >= 3);
// Entity 2 (20 viewers) should rank first.
assert_eq!(results.items[0].entity_id, EntityId::new(2));
db.close().unwrap();
}
#[test]
fn date_saved_sort_latest_first() {
let schema = m6p3_schema_without_viewer_count();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
for id in 1u64..=3 {
db.write_item_with_metadata(EntityId::new(id), &item_meta("Item", "music", 100))
.unwrap();
}
let ts = Timestamp::now();
for id in 1u64..=3 {
db.signal("view", EntityId::new(id), 1.0, ts).unwrap();
}
// Record saves with different timestamps.
let user_state = db.user_state();
user_state.add_save_timestamped(42, 1, 1000);
user_state.add_save_timestamped(42, 2, 3000); // latest
user_state.add_save_timestamped(42, 3, 2000);
let query = Retrieve::builder()
.profile("date_saved")
.for_user(42)
.limit(10)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
assert!(results.items.len() >= 3);
// Entity 2 (saved at 3000) should be first.
assert_eq!(results.items[0].entity_id, EntityId::new(2));
assert_eq!(results.items[1].entity_id, EntityId::new(3));
assert_eq!(results.items[2].entity_id, EntityId::new(1));
db.close().unwrap();
}
#[test]
fn date_saved_requires_for_user() {
let schema = m6p3_schema_without_viewer_count();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
db.write_item_with_metadata(EntityId::new(1), &item_meta("Item", "music", 100))
.unwrap();
db.signal("view", EntityId::new(1), 1.0, Timestamp::now())
.unwrap();
// Query with DateSaved but NO for_user -> should error.
let query = Retrieve::builder()
.profile("date_saved")
.limit(10)
.build()
.unwrap();
let result = db.retrieve(&query);
assert!(
result.is_err(),
"DateSaved sort without FOR USER should fail"
);
db.close().unwrap();
}
#[test]
fn min_signal_filter() {
let schema = m6p3_schema_without_viewer_count();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
for id in 1u64..=3 {
db.write_item_with_metadata(EntityId::new(id), &item_meta("Item", "music", 100))
.unwrap();
}
let ts = Timestamp::now();
// Entity 1: 10 views, Entity 2: 2 views, Entity 3: 5 views.
for _ in 0..10 {
db.signal("view", EntityId::new(1), 1.0, ts).unwrap();
}
for _ in 0..2 {
db.signal("view", EntityId::new(2), 1.0, ts).unwrap();
}
for _ in 0..5 {
db.signal("view", EntityId::new(3), 1.0, ts).unwrap();
}
// Filter: view >= 5 (should exclude entity 2)
let query = Retrieve::builder()
.profile("new")
.filter(FilterExpr::min_signal("view", 5.0))
.limit(10)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
let result_ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
assert!(
!result_ids.contains(&2),
"entity 2 (2 views) should be excluded by MinSignal(5.0), got: {result_ids:?}"
);
assert!(
result_ids.contains(&1),
"entity 1 (10 views) should pass MinSignal(5.0)"
);
assert!(
result_ids.contains(&3),
"entity 3 (5 views) should pass MinSignal(5.0)"
);
db.close().unwrap();
}
#[test]
fn max_signal_filter() {
let schema = m6p3_schema_without_viewer_count();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
for id in 1u64..=3 {
db.write_item_with_metadata(EntityId::new(id), &item_meta("Item", "music", 100))
.unwrap();
}
let ts = Timestamp::now();
// Entity 1: 1 hide, Entity 2: 10 hides, Entity 3: 3 hides.
db.signal("hide", EntityId::new(1), 1.0, ts).unwrap();
for _ in 0..10 {
db.signal("hide", EntityId::new(2), 1.0, ts).unwrap();
}
for _ in 0..3 {
db.signal("hide", EntityId::new(3), 1.0, ts).unwrap();
}
// Also give them views so they appear as candidates.
for id in 1u64..=3 {
db.signal("view", EntityId::new(id), 1.0, ts).unwrap();
}
// Filter: hide <= 5 (should exclude entity 2)
let query = Retrieve::builder()
.profile("new")
.filter(FilterExpr::max_signal("hide", 5.0))
.limit(10)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
let result_ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
assert!(
!result_ids.contains(&2),
"entity 2 (10 hides) should be excluded by MaxSignal(5.0), got: {result_ids:?}"
);
assert!(
result_ids.contains(&1),
"entity 1 (1 hide) should pass MaxSignal(5.0)"
);
assert!(
result_ids.contains(&3),
"entity 3 (3 hides) should pass MaxSignal(5.0)"
);
db.close().unwrap();
}
#[test]
fn near_location_filter() {
let schema = m6p3_schema_without_viewer_count();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
// NYC (40.7128, -74.0060)
db.write_item_with_metadata(
EntityId::new(1),
&item_meta_with_location("NYC Spot", "food", 100, 40.7128, -74.0060),
)
.unwrap();
// Brooklyn (40.6782, -73.9442) -- ~5 km from NYC center
db.write_item_with_metadata(
EntityId::new(2),
&item_meta_with_location("Brooklyn Spot", "food", 100, 40.6782, -73.9442),
)
.unwrap();
// Los Angeles (34.0522, -118.2437) -- ~3940 km from NYC
db.write_item_with_metadata(
EntityId::new(3),
&item_meta_with_location("LA Spot", "food", 100, 34.0522, -118.2437),
)
.unwrap();
let ts = Timestamp::now();
for id in 1u64..=3 {
db.signal("view", EntityId::new(id), 1.0, ts).unwrap();
}
// Filter: within 10 km of NYC center (should include NYC and Brooklyn, exclude LA).
let query = Retrieve::builder()
.profile("new")
.filter(FilterExpr::near_location(40.7128, -74.0060, 10.0))
.limit(10)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
let result_ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
assert!(
result_ids.contains(&1),
"NYC Spot should be within 10 km of NYC center"
);
assert!(
result_ids.contains(&2),
"Brooklyn Spot should be within 10 km of NYC center"
);
assert!(
!result_ids.contains(&3),
"LA Spot should NOT be within 10 km of NYC center, got: {result_ids:?}"
);
db.close().unwrap();
}
#[test]
fn with_viewer_count_signal_convenience() {
// Verify the SchemaBuilder convenience method registers the signal correctly.
let mut builder = SchemaBuilder::new();
let _ = builder
.signal(
"view",
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(3600),
},
)
.windows(&[Window::OneHour])
.velocity(false)
.add();
builder.with_viewer_count_signal();
let schema = builder.build().unwrap();
let vc = schema.signal("viewer_count");
assert!(
vc.is_some(),
"with_viewer_count_signal should register 'viewer_count'"
);
let vc = vc.unwrap();
assert_eq!(vc.target(), EntityKind::Item);
assert!(vc.windows().contains(&Window::AllTime));
}