Milestone 8 (phases 1-4): - Shard-aware WAL segment naming, BatchHeader v2, ShardRouter - Transport trait, InProcessTransport, WalShipper, FollowerDb - HLC, PNCounter, LWWRegister, CrdtSignalState, ReconciliationEngine - Session replication bridge with SeqNo/HWM, idempotency store Forage application: - Multi-source discovery engine with MAB exploration - Embedding-based label system, server handlers, UI refresh Other: - QUICKSTART.md, README.md, milestone-8 planning docs - Hard negative union semantics, RLHF export enhancements - Recovery benchmark and visibility test expansions - Split 8 oversized source files per CODING_GUIDELINES §9 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
876 lines
30 KiB
Rust
876 lines
30 KiB
Rust
/// Forage engine smoke tests.
|
||
///
|
||
/// These tests verify the full engine lifecycle:
|
||
/// seed corpus load → signal write → feed generation → discovered item injection.
|
||
/// They run against an ephemeral (in-memory) engine so they leave no disk state.
|
||
use forage_engine::{ForageEngine, ForageItemInput, SignalKind};
|
||
|
||
// ── P2: Builder pattern tests ─────────────────────────────────────────────────
|
||
|
||
/// `ForageEngine::builder().ephemeral().open()` is equivalent to `ForageEngine::ephemeral()`.
|
||
#[test]
|
||
fn builder_ephemeral_equivalent() {
|
||
let engine = ForageEngine::builder()
|
||
.ephemeral()
|
||
.open()
|
||
.expect("builder ephemeral");
|
||
engine.seed_default_corpus().expect("seed");
|
||
let feed = engine.feed(1, 7).expect("feed");
|
||
assert_eq!(feed.len(), 7);
|
||
}
|
||
|
||
/// Builder with `with_embedder` pointing at a non-existent URL falls back gracefully.
|
||
/// `add_item` must not error out — it logs a warning and uses a neutral vector.
|
||
#[test]
|
||
fn builder_with_embedder_fallback_on_unavailable_sidecar() {
|
||
// Point at a URL that will never respond.
|
||
let engine = ForageEngine::builder()
|
||
.ephemeral()
|
||
.with_embedder("http://127.0.0.1:19999") // nothing listening here
|
||
.open()
|
||
.expect("builder with embedder");
|
||
engine.seed_default_corpus().expect("seed with fallback");
|
||
|
||
let item_id = engine
|
||
.add_item(ForageItemInput {
|
||
url: "https://example.com/p2-fallback".to_owned(),
|
||
title: "P2 Fallback Article".to_owned(),
|
||
source: "example.com".to_owned(),
|
||
category: "technology".to_owned(),
|
||
reading_time_min: 4,
|
||
description: "Tests neutral vector fallback when embedder is down.".to_owned(),
|
||
tags: vec![],
|
||
entities: vec![],
|
||
content_type: String::new(),
|
||
summary: String::new(),
|
||
})
|
||
.expect("add_item must succeed even when embedder is unreachable");
|
||
|
||
// Item should still appear in feed via discovered_ids injection.
|
||
let feed = engine.feed(99, 7).expect("feed");
|
||
assert!(
|
||
feed.iter().any(|i| i.id == item_id),
|
||
"discovered item should appear in feed even with fallback embedding"
|
||
);
|
||
}
|
||
|
||
/// Seeded corpus loads 100 items across ≥3 categories; feed returns requested limit.
|
||
#[test]
|
||
fn seed_corpus_and_cold_feed() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
let feed = engine.feed(1, 7).expect("feed");
|
||
assert_eq!(feed.len(), 7, "feed should return exactly 7 items");
|
||
|
||
let categories: std::collections::HashSet<_> =
|
||
feed.iter().map(|item| item.category.as_str()).collect();
|
||
assert!(
|
||
categories.len() >= 3,
|
||
"cold feed should span ≥3 categories, got: {categories:?}"
|
||
);
|
||
}
|
||
|
||
/// User 3 (convergent, heavy tech+jazz signals) gets different feed than user 1 (cold).
|
||
#[test]
|
||
fn warm_user_differs_from_cold_user() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
let cold_feed: Vec<u64> = engine
|
||
.feed(1, 7)
|
||
.expect("cold feed")
|
||
.iter()
|
||
.map(|i| i.id)
|
||
.collect();
|
||
let warm_feed: Vec<u64> = engine
|
||
.feed(3, 7)
|
||
.expect("warm feed")
|
||
.iter()
|
||
.map(|i| i.id)
|
||
.collect();
|
||
|
||
// At least one item in the warm feed must differ from the cold feed.
|
||
let cold_set: std::collections::HashSet<_> = cold_feed.iter().copied().collect();
|
||
let differs = warm_feed.iter().any(|id| !cold_set.contains(id));
|
||
assert!(
|
||
differs,
|
||
"warm user (user 3) feed should differ from cold user (user 1) feed"
|
||
);
|
||
}
|
||
|
||
/// Signals shift the feed toward the signaled category over time.
|
||
#[test]
|
||
fn signals_shift_feed_toward_signaled_category() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// Fresh user — no preference vector.
|
||
let before: Vec<u64> = engine
|
||
.feed(99, 7)
|
||
.expect("before feed")
|
||
.iter()
|
||
.map(|i| i.id)
|
||
.collect();
|
||
|
||
// Write 10 strong signals on jazz items (ids 25–34).
|
||
for item_id in 25u64..=34 {
|
||
engine
|
||
.signal(99, item_id, SignalKind::View)
|
||
.expect("view signal");
|
||
engine
|
||
.signal(99, item_id, SignalKind::Save)
|
||
.expect("save signal");
|
||
}
|
||
|
||
let after = engine.feed(99, 7).expect("after feed");
|
||
|
||
// After signaling, at least one jazz item must appear in the feed.
|
||
let has_jazz = after.iter().any(|item| item.category == "jazz");
|
||
assert!(
|
||
has_jazz,
|
||
"feed should shift toward jazz after 10 jazz signals; got: {:?}",
|
||
after
|
||
.iter()
|
||
.map(|i| (&i.title, &i.category))
|
||
.collect::<Vec<_>>()
|
||
);
|
||
|
||
// Before and after feeds differ (ranking shifted).
|
||
let before_set: std::collections::HashSet<_> = before.iter().copied().collect();
|
||
let after_ids: Vec<u64> = after.iter().map(|i| i.id).collect();
|
||
let changed = after_ids.iter().any(|id| !before_set.contains(id));
|
||
assert!(changed, "feed should change after signals are written");
|
||
}
|
||
|
||
/// `exploring` label appears: MAB always allocates 1 explore slot for limit=7.
|
||
#[test]
|
||
fn feed_contains_exploring_label() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
let feed = engine.feed(3, 7).expect("feed");
|
||
let has_exploring = feed
|
||
.iter()
|
||
.any(|item| matches!(item.label, forage_engine::ItemLabel::Exploring));
|
||
assert!(
|
||
has_exploring,
|
||
"feed of 7 items should always contain at least 1 Exploring item"
|
||
);
|
||
}
|
||
|
||
/// `add_item` is idempotent: same URL returns same ID without a second DB write.
|
||
#[test]
|
||
fn add_item_is_idempotent() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
let url = "https://example.com/test-article";
|
||
let input = || ForageItemInput {
|
||
url: url.to_owned(),
|
||
title: "Test Article".to_owned(),
|
||
source: "example.com".to_owned(),
|
||
category: "technology".to_owned(),
|
||
reading_time_min: 5,
|
||
description: "A test article for idempotency verification.".to_owned(),
|
||
tags: vec![],
|
||
entities: vec![],
|
||
content_type: String::new(),
|
||
summary: String::new(),
|
||
};
|
||
|
||
let id1 = engine.add_item(input()).expect("first add_item");
|
||
let id2 = engine.add_item(input()).expect("second add_item");
|
||
assert_eq!(
|
||
id1, id2,
|
||
"add_item must return the same ID for the same URL"
|
||
);
|
||
assert!(id1 > 100_000, "discovered item ID must be above seed range");
|
||
}
|
||
|
||
/// Discovered items appear in the feed after `add_item`.
|
||
#[test]
|
||
fn discovered_item_surfaces_in_feed() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
let url = "https://example.com/discovered-page";
|
||
let item_id = engine
|
||
.add_item(ForageItemInput {
|
||
url: url.to_owned(),
|
||
title: "Discovered Page".to_owned(),
|
||
source: "example.com".to_owned(),
|
||
category: "design".to_owned(),
|
||
reading_time_min: 3,
|
||
description: "A page discovered via capture.".to_owned(),
|
||
tags: vec![],
|
||
entities: vec![],
|
||
content_type: String::new(),
|
||
summary: String::new(),
|
||
})
|
||
.expect("add_item");
|
||
|
||
// Use a different user (99) so the item is unseen and eligible.
|
||
let feed = engine.feed(99, 7).expect("feed");
|
||
let in_feed = feed.iter().any(|item| item.id == item_id);
|
||
assert!(
|
||
in_feed,
|
||
"newly added item {item_id} should appear in the next feed"
|
||
);
|
||
}
|
||
|
||
/// `semantic_search` returns results even without an embedder (neutral-vector fallback).
|
||
/// When no sidecar is configured the query vector is uniform, so the assertion is
|
||
/// only that the method succeeds and returns ≤ requested limit — not semantic quality.
|
||
#[test]
|
||
fn semantic_search_returns_results_without_embedder() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// Without an embedder, embed_text returns a neutral unit vector. The ANN
|
||
// query succeeds but results have no semantic ordering.
|
||
let results = engine
|
||
.semantic_search("jazz theory", 5)
|
||
.expect("semantic_search should succeed even without embedder");
|
||
|
||
assert!(
|
||
results.len() <= 5,
|
||
"should respect requested limit; got {}",
|
||
results.len()
|
||
);
|
||
// Must return at least some results from the 100-item corpus.
|
||
assert!(
|
||
!results.is_empty(),
|
||
"semantic_search on a seeded corpus should return at least one item"
|
||
);
|
||
}
|
||
|
||
/// `similar_to` returns items for a valid seed item ID.
|
||
#[test]
|
||
fn similar_to_returns_items_for_seed_item() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// Item 1 is a seed item with a category-axis embedding; similar_to should
|
||
// find items with close vectors (same or nearby category).
|
||
let results = engine
|
||
.similar_to(1, 5)
|
||
.expect("similar_to should succeed for a seeded item");
|
||
|
||
assert!(
|
||
results.len() <= 5,
|
||
"should respect requested limit; got {}",
|
||
results.len()
|
||
);
|
||
assert!(
|
||
!results.is_empty(),
|
||
"similar_to on a seeded corpus should return at least one item"
|
||
);
|
||
// The source item (id=1) should be excluded from its own similarity results.
|
||
assert!(
|
||
!results.iter().any(|i| i.id == 1),
|
||
"source item should be excluded from similar_to results"
|
||
);
|
||
}
|
||
|
||
/// After saving items, `feed` augments the pool with semantically similar candidates.
|
||
#[test]
|
||
fn similar_to_saved_augments_feed_pool() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// User 99 has no history. Record a "save" signal on a jazz item (id 25).
|
||
engine
|
||
.signal(99, 25, SignalKind::Save)
|
||
.expect("save signal");
|
||
|
||
// Feed should succeed; the similar_to_saved path is exercised.
|
||
let feed = engine.feed(99, 7).expect("feed after save");
|
||
assert_eq!(feed.len(), 7, "feed should return 7 items");
|
||
}
|
||
|
||
/// `top_categories` returns empty for cold users and non-empty for warm users.
|
||
#[test]
|
||
fn top_categories_reflects_user_state() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// User 1 is cold — no preference vector.
|
||
let cold = engine.top_categories(1);
|
||
assert!(
|
||
cold.is_empty(),
|
||
"cold user should have no top categories, got: {cold:?}"
|
||
);
|
||
|
||
// User 3 is convergent on tech+jazz — should have preferences.
|
||
let warm = engine.top_categories(3);
|
||
assert!(
|
||
!warm.is_empty(),
|
||
"warm user (user 3) should have top categories"
|
||
);
|
||
}
|
||
|
||
// ── P4: Bridge item (surprise moment) tests ───────────────────────────────────
|
||
|
||
/// Warm user with signals in 2+ categories gets a Bridge-labelled item in feed.
|
||
#[test]
|
||
fn bridge_item_appears_for_warm_user() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// User 3 is convergent on tech+jazz — has ≥2 active preference dimensions.
|
||
let feed = engine.feed(3, 7).expect("feed");
|
||
let has_bridge = feed
|
||
.iter()
|
||
.any(|item| matches!(&item.label, forage_engine::ItemLabel::Bridge { .. }));
|
||
assert!(
|
||
has_bridge,
|
||
"warm user (user 3) with tech+jazz signals should get a Bridge item; \
|
||
labels: {:?}",
|
||
feed.iter()
|
||
.map(|i| (&i.category, format!("{:?}", i.label)))
|
||
.collect::<Vec<_>>()
|
||
);
|
||
}
|
||
|
||
/// Cold-start user (no preference vector) does not receive a Bridge item.
|
||
#[test]
|
||
fn bridge_item_absent_for_cold_user() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// User 1 is cold — no signals, no preference vector.
|
||
let feed = engine.feed(1, 7).expect("cold feed");
|
||
let has_bridge = feed
|
||
.iter()
|
||
.any(|item| matches!(&item.label, forage_engine::ItemLabel::Bridge { .. }));
|
||
assert!(
|
||
!has_bridge,
|
||
"cold user (user 1) should not receive a Bridge item; \
|
||
labels: {:?}",
|
||
feed.iter()
|
||
.map(|i| (&i.category, format!("{:?}", i.label)))
|
||
.collect::<Vec<_>>()
|
||
);
|
||
}
|
||
|
||
/// Bridge label carries distinct, non-empty category names.
|
||
#[test]
|
||
fn bridge_label_carries_category_names() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
let feed = engine.feed(3, 7).expect("feed");
|
||
let bridge = feed
|
||
.iter()
|
||
.find(|item| matches!(&item.label, forage_engine::ItemLabel::Bridge { .. }))
|
||
.expect("user 3 should have a Bridge item");
|
||
|
||
let forage_engine::ItemLabel::Bridge { cat_a, cat_b } = &bridge.label else {
|
||
panic!("expected Bridge label");
|
||
};
|
||
assert!(!cat_a.is_empty(), "cat_a must not be empty");
|
||
assert!(!cat_b.is_empty(), "cat_b must not be empty");
|
||
assert_ne!(
|
||
cat_a, cat_b,
|
||
"bridge categories must be distinct; got both = {cat_a}"
|
||
);
|
||
}
|
||
|
||
// ── P3: Adaptive MAB tests ─────────────────────────────────────────────────────
|
||
|
||
/// Cold-start user has default exploration ratio and zero stats.
|
||
#[test]
|
||
fn adaptive_ratio_defaults_for_cold_user() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
let stats = engine.exploration_stats(99);
|
||
assert_eq!(
|
||
stats.exploration_total, 0,
|
||
"cold user has no exploration history"
|
||
);
|
||
assert_eq!(
|
||
stats.adaptive_ratio(),
|
||
0.14,
|
||
"cold user uses default exploration ratio"
|
||
);
|
||
}
|
||
|
||
/// Exploration ratio rises to 0.25 after enough positive exploration hits.
|
||
#[test]
|
||
fn adaptive_ratio_rises_for_adventurous_user() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// Simulate the user engaging with 6 out of 10 exploring items (hit_rate = 0.6 > 0.5).
|
||
// We write a feed to populate last_explore_items, then signal the exploring item.
|
||
// Repeat until we have enough exploration history.
|
||
for round in 0..10u64 {
|
||
let feed = engine.feed(42, 7).expect("feed");
|
||
for item in &feed {
|
||
if matches!(item.label, forage_engine::ItemLabel::Exploring) {
|
||
// Signal positively to record a hit (6 of 10 rounds are positive).
|
||
if round < 6 {
|
||
engine
|
||
.signal(42, item.id, SignalKind::View)
|
||
.expect("view signal");
|
||
} else {
|
||
engine
|
||
.signal(42, item.id, SignalKind::Skip)
|
||
.expect("skip signal");
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
let stats = engine.exploration_stats(42);
|
||
assert!(
|
||
stats.exploration_total >= 6,
|
||
"should have at least 6 exploration outcomes, got {}",
|
||
stats.exploration_total
|
||
);
|
||
assert!(
|
||
stats.hit_rate() > 0.5,
|
||
"hit_rate should exceed 0.5, got {}",
|
||
stats.hit_rate()
|
||
);
|
||
assert_eq!(
|
||
stats.adaptive_ratio(),
|
||
0.25,
|
||
"adventurous user should get 0.25 exploration ratio"
|
||
);
|
||
}
|
||
|
||
/// Exploration ratio drops to 0.10 for a convergent user who ignores exploration items.
|
||
#[test]
|
||
fn adaptive_ratio_drops_for_convergent_user() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// Signal a lot on jazz items first so the user has a warm preference vector.
|
||
for item_id in 25u64..=34 {
|
||
engine
|
||
.signal(55, item_id, SignalKind::View)
|
||
.expect("view signal");
|
||
}
|
||
|
||
// Now simulate 10 rounds where the user always skips exploration items.
|
||
for _ in 0..10 {
|
||
let feed = engine.feed(55, 7).expect("feed");
|
||
for item in &feed {
|
||
if matches!(item.label, forage_engine::ItemLabel::Exploring) {
|
||
engine
|
||
.signal(55, item.id, SignalKind::Skip)
|
||
.expect("skip signal");
|
||
}
|
||
}
|
||
}
|
||
|
||
let stats = engine.exploration_stats(55);
|
||
// 10 rounds × at least 1 explore slot each → must have ≥5 outcomes.
|
||
assert!(
|
||
stats.exploration_total >= 5,
|
||
"should have ≥5 exploration outcomes after 10 rounds; got {}",
|
||
stats.exploration_total
|
||
);
|
||
assert!(
|
||
stats.hit_rate() < 0.2,
|
||
"convergent user hit_rate should be < 0.2, got {}",
|
||
stats.hit_rate()
|
||
);
|
||
assert_eq!(
|
||
stats.adaptive_ratio(),
|
||
0.10,
|
||
"convergent user should get 0.10 exploration ratio"
|
||
);
|
||
}
|
||
|
||
/// UCB1 bonus computation increases for categories with no signals.
|
||
#[test]
|
||
fn ucb1_bonus_higher_for_unseen_categories() {
|
||
use forage_engine::ExplorationStats;
|
||
|
||
let mut stats = ExplorationStats::default();
|
||
// Record 10 signals in "technology" and 0 in "jazz".
|
||
for _ in 0..10 {
|
||
stats.record_category_signal("technology");
|
||
}
|
||
|
||
let tech_bonus = stats.ucb1_bonus("technology");
|
||
let jazz_bonus = stats.ucb1_bonus("jazz");
|
||
|
||
assert!(
|
||
jazz_bonus > tech_bonus,
|
||
"unseen category (jazz) should get higher UCB1 bonus than explored category (technology); \
|
||
jazz={jazz_bonus:.3}, tech={tech_bonus:.3}"
|
||
);
|
||
// Zero total signals → zero bonus.
|
||
let empty = ExplorationStats::default();
|
||
assert_eq!(
|
||
empty.ucb1_bonus("anything"),
|
||
0.0,
|
||
"cold user gets zero UCB1 bonus"
|
||
);
|
||
}
|
||
|
||
/// Category signals are tracked when `signal()` is called.
|
||
#[test]
|
||
fn category_signals_tracked_on_signal_write() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// Item IDs 25–34 are jazz items (per seed corpus).
|
||
engine
|
||
.signal(77, 25, SignalKind::View)
|
||
.expect("view signal");
|
||
engine
|
||
.signal(77, 26, SignalKind::Save)
|
||
.expect("save signal");
|
||
|
||
let stats = engine.exploration_stats(77);
|
||
let jazz_count = stats.category_signals.get("jazz").copied().unwrap_or(0);
|
||
assert_eq!(
|
||
jazz_count, 2,
|
||
"two jazz signals should be tracked; category_signals={:?}",
|
||
stats.category_signals
|
||
);
|
||
}
|
||
|
||
// ── Browse tasks tests ──────────────────────────────────────────────────────
|
||
|
||
/// Cold user (no signals) gets all 8 source categories at equal priority.
|
||
#[test]
|
||
fn browse_tasks_cold_start_equal_weights() {
|
||
let engine = ForageEngine::ephemeral().unwrap();
|
||
engine.seed_default_corpus().unwrap();
|
||
|
||
// Cold user (no signals) — all 8 categories should have equal priority ~0.125.
|
||
let plan = engine.browse_tasks(99, 5); // user 99 has no signals
|
||
|
||
assert!(plan.should_run);
|
||
assert_eq!(plan.limit_per_topic, 5);
|
||
assert_eq!(plan.interval_minutes, 30);
|
||
assert_eq!(
|
||
plan.topics.len(),
|
||
8,
|
||
"all 8 source categories should be present"
|
||
);
|
||
|
||
// All priorities should be equal (within floating point tolerance).
|
||
let first_priority = plan.topics[0].priority;
|
||
for topic in &plan.topics {
|
||
assert!(
|
||
(topic.priority - first_priority).abs() < 1e-5,
|
||
"cold-start topics should have equal priority, got {} and {}",
|
||
first_priority,
|
||
topic.priority
|
||
);
|
||
}
|
||
|
||
// Every topic must have at least 1 source URL.
|
||
for topic in &plan.topics {
|
||
assert!(
|
||
!topic.sources.is_empty(),
|
||
"topic '{}' has no sources",
|
||
topic.name
|
||
);
|
||
}
|
||
|
||
// Cold start: no tag hints.
|
||
assert!(
|
||
plan.tag_hints.is_empty(),
|
||
"cold user should have no tag hints"
|
||
);
|
||
}
|
||
|
||
/// Warm user with jazz saves gets jazz as the highest-priority browse topic.
|
||
#[test]
|
||
fn browse_tasks_warm_user_top_category_ranks_first() {
|
||
let engine = ForageEngine::ephemeral().unwrap();
|
||
engine.seed_default_corpus().unwrap();
|
||
|
||
// Give user 1 several saves on jazz items to build a preference.
|
||
// Find jazz seed items and save them.
|
||
let jazz_items: Vec<u64> = engine
|
||
.all_items()
|
||
.iter()
|
||
.filter(|s| s.category == "jazz")
|
||
.take(5)
|
||
.map(|s| s.id)
|
||
.collect();
|
||
assert!(!jazz_items.is_empty(), "seed corpus should have jazz items");
|
||
|
||
for id in &jazz_items {
|
||
engine.signal(1, *id, SignalKind::Save).unwrap();
|
||
}
|
||
|
||
let plan = engine.browse_tasks(1, 5);
|
||
|
||
// Jazz should be the highest-priority topic.
|
||
assert!(!plan.topics.is_empty());
|
||
assert_eq!(
|
||
plan.topics[0].name,
|
||
"jazz",
|
||
"jazz should rank first after jazz saves, got: {:?}",
|
||
plan.topics
|
||
.iter()
|
||
.map(|t| (&t.name, t.priority))
|
||
.collect::<Vec<_>>()
|
||
);
|
||
|
||
// Jazz's priority should be higher than all other topics.
|
||
let jazz_priority = plan.topics[0].priority;
|
||
for other in plan.topics.iter().skip(1) {
|
||
assert!(
|
||
jazz_priority > other.priority,
|
||
"jazz ({}) should outrank {} ({})",
|
||
jazz_priority,
|
||
other.name,
|
||
other.priority
|
||
);
|
||
}
|
||
}
|
||
|
||
/// Tag hints are populated from saved items' tags.
|
||
#[test]
|
||
fn browse_tasks_tag_hints_populated_from_saves() {
|
||
let engine = ForageEngine::ephemeral().unwrap();
|
||
engine.seed_default_corpus().unwrap();
|
||
|
||
// Capture an item with tags and save it.
|
||
let input = ForageItemInput {
|
||
url: "https://example.com/modal-jazz-article".to_string(),
|
||
title: "A Guide to Modal Jazz".to_string(),
|
||
source: "example.com".to_string(),
|
||
category: "jazz".to_string(),
|
||
reading_time_min: 8,
|
||
description: "Deep dive into modal jazz techniques.".to_string(),
|
||
tags: vec![
|
||
"modal jazz".to_string(),
|
||
"music theory".to_string(),
|
||
"coltrane".to_string(),
|
||
],
|
||
entities: vec!["John Coltrane".to_string()],
|
||
content_type: "tutorial".to_string(),
|
||
summary: "Explores the harmonic language of modal jazz. Coltrane is the central focus."
|
||
.to_string(),
|
||
};
|
||
let item_id = engine.add_item(input).unwrap();
|
||
|
||
// Save the item for user 1.
|
||
engine.signal(1, item_id, SignalKind::Save).unwrap();
|
||
|
||
let plan = engine.browse_tasks(1, 5);
|
||
|
||
// Tag hints should contain the tags from the saved item.
|
||
assert!(
|
||
plan.tag_hints.contains(&"modal jazz".to_string()),
|
||
"tag_hints should contain 'modal jazz', got: {:?}",
|
||
plan.tag_hints
|
||
);
|
||
assert!(
|
||
plan.tag_hints.contains(&"music theory".to_string()),
|
||
"tag_hints should contain 'music theory', got: {:?}",
|
||
plan.tag_hints
|
||
);
|
||
}
|
||
|
||
// ── Top tags tests ──────────────────────────────────────────────────────────
|
||
|
||
/// Cold user (no saves) gets empty top_tags.
|
||
#[test]
|
||
fn top_tags_empty_for_cold_user() {
|
||
let engine = ForageEngine::ephemeral().unwrap();
|
||
engine.seed_default_corpus().unwrap();
|
||
|
||
// User 99 has no saves — top_tags should return empty.
|
||
let tags = engine.top_tags(99, 5);
|
||
assert!(
|
||
tags.is_empty(),
|
||
"cold user should have no tags, got: {:?}",
|
||
tags
|
||
);
|
||
}
|
||
|
||
/// Top tags are ordered by frequency of occurrence across saved items.
|
||
#[test]
|
||
fn top_tags_frequency_ranked() {
|
||
let engine = ForageEngine::ephemeral().unwrap();
|
||
engine.seed_default_corpus().unwrap();
|
||
|
||
// Capture 3 items with overlapping tags and save them.
|
||
// "rust" appears 3 times, "async" appears 2 times, "wasm" appears 1 time.
|
||
let items = vec![
|
||
(
|
||
"https://example.com/rust-async",
|
||
vec!["rust", "async", "tokio"],
|
||
),
|
||
("https://example.com/rust-wasm", vec!["rust", "wasm"]),
|
||
("https://example.com/rust-futures", vec!["rust", "async"]),
|
||
];
|
||
|
||
for (url, tags) in items {
|
||
let input = ForageItemInput {
|
||
url: url.to_string(),
|
||
title: format!("Article: {url}"),
|
||
source: "example.com".to_string(),
|
||
category: "technology".to_string(),
|
||
reading_time_min: 5,
|
||
description: String::new(),
|
||
tags: tags.iter().map(|s| s.to_string()).collect(),
|
||
entities: vec![],
|
||
content_type: "tutorial".to_string(),
|
||
summary: String::new(),
|
||
};
|
||
let id = engine.add_item(input).unwrap();
|
||
engine.signal(1, id, SignalKind::Save).unwrap();
|
||
}
|
||
|
||
let tags = engine.top_tags(1, 5);
|
||
|
||
// "rust" appears 3x — must be first.
|
||
assert!(!tags.is_empty(), "should have tags after saves");
|
||
assert_eq!(
|
||
tags[0], "rust",
|
||
"most frequent tag should be first, got: {:?}",
|
||
tags
|
||
);
|
||
|
||
// "async" appears 2x — must rank above "wasm" (1x).
|
||
let async_pos = tags
|
||
.iter()
|
||
.position(|t| t == "async")
|
||
.expect("async should be present");
|
||
let wasm_pos = tags
|
||
.iter()
|
||
.position(|t| t == "wasm")
|
||
.expect("wasm should be present");
|
||
assert!(
|
||
async_pos < wasm_pos,
|
||
"async (2x) should rank before wasm (1x)"
|
||
);
|
||
}
|
||
|
||
/// Enrichment fields (tags, entities, content_type, summary) stored via `add_item`
|
||
/// are hydrated on feed items returned by `feed()`.
|
||
/// Regression guard for the feed enrichment hydration path added in fix-all.
|
||
#[test]
|
||
fn discovered_item_enrichment_preserved_in_feed() {
|
||
let engine = ForageEngine::ephemeral().unwrap();
|
||
engine.seed_default_corpus().unwrap();
|
||
|
||
let item_id = engine
|
||
.add_item(ForageItemInput {
|
||
url: "https://example.com/enriched-article".to_string(),
|
||
title: "Enriched Article".to_string(),
|
||
source: "example.com".to_string(),
|
||
category: "technology".to_string(),
|
||
reading_time_min: 6,
|
||
description: "An article with full enrichment metadata.".to_string(),
|
||
tags: vec!["rust".to_string(), "async".to_string()],
|
||
entities: vec!["Tokio".to_string()],
|
||
content_type: "tutorial".to_string(),
|
||
summary: "Teaches async Rust. Tokio is the runtime used throughout.".to_string(),
|
||
})
|
||
.unwrap();
|
||
|
||
// Retrieve feed for a fresh user so the discovered item is injected.
|
||
let feed = engine.feed(99, 7).unwrap();
|
||
let item = feed
|
||
.iter()
|
||
.find(|i| i.id == item_id)
|
||
.expect("discovered item should appear in feed");
|
||
|
||
assert_eq!(
|
||
item.tags,
|
||
vec!["rust", "async"],
|
||
"feed item should carry its stored tags, got: {:?}",
|
||
item.tags
|
||
);
|
||
assert_eq!(
|
||
item.entities,
|
||
vec!["Tokio"],
|
||
"feed item should carry its stored entities, got: {:?}",
|
||
item.entities
|
||
);
|
||
assert_eq!(
|
||
item.content_type, "tutorial",
|
||
"feed item should carry its stored content_type"
|
||
);
|
||
assert!(
|
||
!item.summary.is_empty(),
|
||
"feed item should carry its stored summary, got empty string"
|
||
);
|
||
}
|
||
|
||
/// Items the user dwelled on for ≥15 seconds contribute to `top_tags`,
|
||
/// even if they were never explicitly saved.
|
||
#[test]
|
||
fn top_tags_includes_dwell_items() {
|
||
let engine = ForageEngine::ephemeral().unwrap();
|
||
engine.seed_default_corpus().unwrap();
|
||
|
||
// Add an item with distinctive tags.
|
||
let item_id = engine
|
||
.add_item(ForageItemInput {
|
||
url: "https://example.com/dwell-tagged-article".to_string(),
|
||
title: "Deep Read Article".to_string(),
|
||
source: "example.com".to_string(),
|
||
category: "science".to_string(),
|
||
reading_time_min: 10,
|
||
description: "An article worth reading slowly.".to_string(),
|
||
tags: vec!["quantum computing".to_string(), "research".to_string()],
|
||
entities: vec![],
|
||
content_type: "research".to_string(),
|
||
summary: "Explores quantum error correction. Practical applications are assessed."
|
||
.to_string(),
|
||
})
|
||
.unwrap();
|
||
|
||
// Dwell ≥15 000 ms (completion threshold) without saving.
|
||
engine.signal_dwell(88, item_id, 20_000).unwrap();
|
||
|
||
// top_tags should include tags from the dwelled item.
|
||
let tags = engine.top_tags(88, 5);
|
||
assert!(
|
||
tags.contains(&"quantum computing".to_string()),
|
||
"top_tags should include tags from completion-dwell items; got: {:?}",
|
||
tags
|
||
);
|
||
|
||
// Short dwell (< 15 s) should NOT contribute tags.
|
||
let item_id2 = engine
|
||
.add_item(ForageItemInput {
|
||
url: "https://example.com/short-dwell-article".to_string(),
|
||
title: "Brief Glance Article".to_string(),
|
||
source: "example.com".to_string(),
|
||
category: "science".to_string(),
|
||
reading_time_min: 5,
|
||
description: "Skimmed article.".to_string(),
|
||
tags: vec!["astronomy".to_string()],
|
||
entities: vec![],
|
||
content_type: "news".to_string(),
|
||
summary: "Brief overview of recent astronomy findings.".to_string(),
|
||
})
|
||
.unwrap();
|
||
engine.signal_dwell(88, item_id2, 5_000).unwrap(); // only 5 seconds
|
||
|
||
let tags_after = engine.top_tags(88, 10);
|
||
assert!(
|
||
!tags_after.contains(&"astronomy".to_string()),
|
||
"short dwell (<15s) should not contribute tags; got: {:?}",
|
||
tags_after
|
||
);
|
||
}
|
||
|
||
/// `signal_dwell` is re-exported properly for test use.
|
||
/// Quick sanity check that the method exists and accepts valid parameters.
|
||
#[test]
|
||
fn signal_dwell_method_exists() {
|
||
let engine = ForageEngine::ephemeral().unwrap();
|
||
engine.seed_default_corpus().unwrap();
|
||
// 30 seconds of dwell on a seed item — should succeed without error.
|
||
engine.signal_dwell(1, 1, 30_000).unwrap();
|
||
}
|