527 lines
18 KiB
Rust
527 lines
18 KiB
Rust
/// Forage engine smoke tests.
|
||
///
|
||
/// These tests verify the full engine lifecycle:
|
||
/// seed corpus load → signal write → feed generation → discovered item injection.
|
||
/// They run against an ephemeral (in-memory) engine so they leave no disk state.
|
||
use forage_engine::{ForageEngine, ForageItemInput, SignalKind};
|
||
|
||
// ── P2: Builder pattern tests ─────────────────────────────────────────────────
|
||
|
||
/// `ForageEngine::builder().ephemeral().open()` is equivalent to `ForageEngine::ephemeral()`.
|
||
#[test]
|
||
fn builder_ephemeral_equivalent() {
|
||
let engine = ForageEngine::builder()
|
||
.ephemeral()
|
||
.open()
|
||
.expect("builder ephemeral");
|
||
engine.seed_default_corpus().expect("seed");
|
||
let feed = engine.feed(1, 7).expect("feed");
|
||
assert_eq!(feed.len(), 7);
|
||
}
|
||
|
||
/// Builder with `with_embedder` pointing at a non-existent URL falls back gracefully.
|
||
/// `add_item` must not error out — it logs a warning and uses a neutral vector.
|
||
#[test]
|
||
fn builder_with_embedder_fallback_on_unavailable_sidecar() {
|
||
// Point at a URL that will never respond.
|
||
let engine = ForageEngine::builder()
|
||
.ephemeral()
|
||
.with_embedder("http://127.0.0.1:19999") // nothing listening here
|
||
.open()
|
||
.expect("builder with embedder");
|
||
engine.seed_default_corpus().expect("seed with fallback");
|
||
|
||
let item_id = engine
|
||
.add_item(ForageItemInput {
|
||
url: "https://example.com/p2-fallback".to_owned(),
|
||
title: "P2 Fallback Article".to_owned(),
|
||
source: "example.com".to_owned(),
|
||
category: "technology".to_owned(),
|
||
reading_time_min: 4,
|
||
description: "Tests neutral vector fallback when embedder is down.".to_owned(),
|
||
})
|
||
.expect("add_item must succeed even when embedder is unreachable");
|
||
|
||
// Item should still appear in feed via discovered_ids injection.
|
||
let feed = engine.feed(99, 7).expect("feed");
|
||
assert!(
|
||
feed.iter().any(|i| i.id == item_id),
|
||
"discovered item should appear in feed even with fallback embedding"
|
||
);
|
||
}
|
||
|
||
/// Seeded corpus loads 100 items across ≥3 categories; feed returns requested limit.
|
||
#[test]
|
||
fn seed_corpus_and_cold_feed() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
let feed = engine.feed(1, 7).expect("feed");
|
||
assert_eq!(feed.len(), 7, "feed should return exactly 7 items");
|
||
|
||
let categories: std::collections::HashSet<_> =
|
||
feed.iter().map(|item| item.category.as_str()).collect();
|
||
assert!(
|
||
categories.len() >= 3,
|
||
"cold feed should span ≥3 categories, got: {categories:?}"
|
||
);
|
||
}
|
||
|
||
/// User 3 (convergent, heavy tech+jazz signals) gets different feed than user 1 (cold).
|
||
#[test]
|
||
fn warm_user_differs_from_cold_user() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
let cold_feed: Vec<u64> = engine
|
||
.feed(1, 7)
|
||
.expect("cold feed")
|
||
.iter()
|
||
.map(|i| i.id)
|
||
.collect();
|
||
let warm_feed: Vec<u64> = engine
|
||
.feed(3, 7)
|
||
.expect("warm feed")
|
||
.iter()
|
||
.map(|i| i.id)
|
||
.collect();
|
||
|
||
// At least one item in the warm feed must differ from the cold feed.
|
||
let cold_set: std::collections::HashSet<_> = cold_feed.iter().copied().collect();
|
||
let differs = warm_feed.iter().any(|id| !cold_set.contains(id));
|
||
assert!(
|
||
differs,
|
||
"warm user (user 3) feed should differ from cold user (user 1) feed"
|
||
);
|
||
}
|
||
|
||
/// Signals shift the feed toward the signaled category over time.
|
||
#[test]
|
||
fn signals_shift_feed_toward_signaled_category() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// Fresh user — no preference vector.
|
||
let before: Vec<u64> = engine
|
||
.feed(99, 7)
|
||
.expect("before feed")
|
||
.iter()
|
||
.map(|i| i.id)
|
||
.collect();
|
||
|
||
// Write 10 strong signals on jazz items (ids 25–34).
|
||
for item_id in 25u64..=34 {
|
||
engine
|
||
.signal(99, item_id, SignalKind::View)
|
||
.expect("view signal");
|
||
engine
|
||
.signal(99, item_id, SignalKind::Save)
|
||
.expect("save signal");
|
||
}
|
||
|
||
let after = engine.feed(99, 7).expect("after feed");
|
||
|
||
// After signaling, at least one jazz item must appear in the feed.
|
||
let has_jazz = after.iter().any(|item| item.category == "jazz");
|
||
assert!(
|
||
has_jazz,
|
||
"feed should shift toward jazz after 10 jazz signals; got: {:?}",
|
||
after
|
||
.iter()
|
||
.map(|i| (&i.title, &i.category))
|
||
.collect::<Vec<_>>()
|
||
);
|
||
|
||
// Before and after feeds differ (ranking shifted).
|
||
let before_set: std::collections::HashSet<_> = before.iter().copied().collect();
|
||
let after_ids: Vec<u64> = after.iter().map(|i| i.id).collect();
|
||
let changed = after_ids.iter().any(|id| !before_set.contains(id));
|
||
assert!(changed, "feed should change after signals are written");
|
||
}
|
||
|
||
/// `exploring` label appears: MAB always allocates 1 explore slot for limit=7.
|
||
#[test]
|
||
fn feed_contains_exploring_label() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
let feed = engine.feed(3, 7).expect("feed");
|
||
let has_exploring = feed
|
||
.iter()
|
||
.any(|item| matches!(item.label, forage_engine::ItemLabel::Exploring));
|
||
assert!(
|
||
has_exploring,
|
||
"feed of 7 items should always contain at least 1 Exploring item"
|
||
);
|
||
}
|
||
|
||
/// `add_item` is idempotent: same URL returns same ID without a second DB write.
|
||
#[test]
|
||
fn add_item_is_idempotent() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
let url = "https://example.com/test-article";
|
||
let input = || ForageItemInput {
|
||
url: url.to_owned(),
|
||
title: "Test Article".to_owned(),
|
||
source: "example.com".to_owned(),
|
||
category: "technology".to_owned(),
|
||
reading_time_min: 5,
|
||
description: "A test article for idempotency verification.".to_owned(),
|
||
};
|
||
|
||
let id1 = engine.add_item(input()).expect("first add_item");
|
||
let id2 = engine.add_item(input()).expect("second add_item");
|
||
assert_eq!(
|
||
id1, id2,
|
||
"add_item must return the same ID for the same URL"
|
||
);
|
||
assert!(id1 > 100_000, "discovered item ID must be above seed range");
|
||
}
|
||
|
||
/// Discovered items appear in the feed after `add_item`.
|
||
#[test]
|
||
fn discovered_item_surfaces_in_feed() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
let url = "https://example.com/discovered-page";
|
||
let item_id = engine
|
||
.add_item(ForageItemInput {
|
||
url: url.to_owned(),
|
||
title: "Discovered Page".to_owned(),
|
||
source: "example.com".to_owned(),
|
||
category: "design".to_owned(),
|
||
reading_time_min: 3,
|
||
description: "A page discovered via capture.".to_owned(),
|
||
})
|
||
.expect("add_item");
|
||
|
||
// Use a different user (99) so the item is unseen and eligible.
|
||
let feed = engine.feed(99, 7).expect("feed");
|
||
let in_feed = feed.iter().any(|item| item.id == item_id);
|
||
assert!(
|
||
in_feed,
|
||
"newly added item {item_id} should appear in the next feed"
|
||
);
|
||
}
|
||
|
||
/// `semantic_search` returns results even without an embedder (neutral-vector fallback).
|
||
/// When no sidecar is configured the query vector is uniform, so the assertion is
|
||
/// only that the method succeeds and returns ≤ requested limit — not semantic quality.
|
||
#[test]
|
||
fn semantic_search_returns_results_without_embedder() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// Without an embedder, embed_text returns a neutral unit vector. The ANN
|
||
// query succeeds but results have no semantic ordering.
|
||
let results = engine
|
||
.semantic_search("jazz theory", 5)
|
||
.expect("semantic_search should succeed even without embedder");
|
||
|
||
assert!(
|
||
results.len() <= 5,
|
||
"should respect requested limit; got {}",
|
||
results.len()
|
||
);
|
||
// Must return at least some results from the 100-item corpus.
|
||
assert!(
|
||
!results.is_empty(),
|
||
"semantic_search on a seeded corpus should return at least one item"
|
||
);
|
||
}
|
||
|
||
/// `similar_to` returns items for a valid seed item ID.
|
||
#[test]
|
||
fn similar_to_returns_items_for_seed_item() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// Item 1 is a seed item with a category-axis embedding; similar_to should
|
||
// find items with close vectors (same or nearby category).
|
||
let results = engine
|
||
.similar_to(1, 5)
|
||
.expect("similar_to should succeed for a seeded item");
|
||
|
||
assert!(
|
||
results.len() <= 5,
|
||
"should respect requested limit; got {}",
|
||
results.len()
|
||
);
|
||
assert!(
|
||
!results.is_empty(),
|
||
"similar_to on a seeded corpus should return at least one item"
|
||
);
|
||
// The source item (id=1) should be excluded from its own similarity results.
|
||
assert!(
|
||
!results.iter().any(|i| i.id == 1),
|
||
"source item should be excluded from similar_to results"
|
||
);
|
||
}
|
||
|
||
/// After saving items, `feed` augments the pool with semantically similar candidates.
|
||
#[test]
|
||
fn similar_to_saved_augments_feed_pool() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// User 99 has no history. Record a "save" signal on a jazz item (id 25).
|
||
engine
|
||
.signal(99, 25, SignalKind::Save)
|
||
.expect("save signal");
|
||
|
||
// Feed should succeed; the similar_to_saved path is exercised.
|
||
let feed = engine.feed(99, 7).expect("feed after save");
|
||
assert_eq!(feed.len(), 7, "feed should return 7 items");
|
||
}
|
||
|
||
/// `top_categories` returns empty for cold users and non-empty for warm users.
|
||
#[test]
|
||
fn top_categories_reflects_user_state() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// User 1 is cold — no preference vector.
|
||
let cold = engine.top_categories(1);
|
||
assert!(
|
||
cold.is_empty(),
|
||
"cold user should have no top categories, got: {cold:?}"
|
||
);
|
||
|
||
// User 3 is convergent on tech+jazz — should have preferences.
|
||
let warm = engine.top_categories(3);
|
||
assert!(
|
||
!warm.is_empty(),
|
||
"warm user (user 3) should have top categories"
|
||
);
|
||
}
|
||
|
||
// ── P4: Bridge item (surprise moment) tests ───────────────────────────────────
|
||
|
||
/// Warm user with signals in 2+ categories gets a Bridge-labelled item in feed.
|
||
#[test]
|
||
fn bridge_item_appears_for_warm_user() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// User 3 is convergent on tech+jazz — has ≥2 active preference dimensions.
|
||
let feed = engine.feed(3, 7).expect("feed");
|
||
let has_bridge = feed
|
||
.iter()
|
||
.any(|item| matches!(&item.label, forage_engine::ItemLabel::Bridge { .. }));
|
||
assert!(
|
||
has_bridge,
|
||
"warm user (user 3) with tech+jazz signals should get a Bridge item; \
|
||
labels: {:?}",
|
||
feed.iter()
|
||
.map(|i| (&i.category, format!("{:?}", i.label)))
|
||
.collect::<Vec<_>>()
|
||
);
|
||
}
|
||
|
||
/// Cold-start user (no preference vector) does not receive a Bridge item.
|
||
#[test]
|
||
fn bridge_item_absent_for_cold_user() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// User 1 is cold — no signals, no preference vector.
|
||
let feed = engine.feed(1, 7).expect("cold feed");
|
||
let has_bridge = feed
|
||
.iter()
|
||
.any(|item| matches!(&item.label, forage_engine::ItemLabel::Bridge { .. }));
|
||
assert!(
|
||
!has_bridge,
|
||
"cold user (user 1) should not receive a Bridge item; \
|
||
labels: {:?}",
|
||
feed.iter()
|
||
.map(|i| (&i.category, format!("{:?}", i.label)))
|
||
.collect::<Vec<_>>()
|
||
);
|
||
}
|
||
|
||
/// Bridge label carries distinct, non-empty category names.
|
||
#[test]
|
||
fn bridge_label_carries_category_names() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
let feed = engine.feed(3, 7).expect("feed");
|
||
let bridge = feed
|
||
.iter()
|
||
.find(|item| matches!(&item.label, forage_engine::ItemLabel::Bridge { .. }))
|
||
.expect("user 3 should have a Bridge item");
|
||
|
||
let forage_engine::ItemLabel::Bridge { cat_a, cat_b } = &bridge.label else {
|
||
panic!("expected Bridge label");
|
||
};
|
||
assert!(!cat_a.is_empty(), "cat_a must not be empty");
|
||
assert!(!cat_b.is_empty(), "cat_b must not be empty");
|
||
assert_ne!(
|
||
cat_a, cat_b,
|
||
"bridge categories must be distinct; got both = {cat_a}"
|
||
);
|
||
}
|
||
|
||
// ── P3: Adaptive MAB tests ─────────────────────────────────────────────────────
|
||
|
||
/// Cold-start user has default exploration ratio and zero stats.
|
||
#[test]
|
||
fn adaptive_ratio_defaults_for_cold_user() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
let stats = engine.exploration_stats(99);
|
||
assert_eq!(
|
||
stats.exploration_total, 0,
|
||
"cold user has no exploration history"
|
||
);
|
||
assert_eq!(
|
||
stats.adaptive_ratio(),
|
||
0.14,
|
||
"cold user uses default exploration ratio"
|
||
);
|
||
}
|
||
|
||
/// Exploration ratio rises to 0.25 after enough positive exploration hits.
|
||
#[test]
|
||
fn adaptive_ratio_rises_for_adventurous_user() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// Simulate the user engaging with 6 out of 10 exploring items (hit_rate = 0.6 > 0.5).
|
||
// We write a feed to populate last_explore_items, then signal the exploring item.
|
||
// Repeat until we have enough exploration history.
|
||
for round in 0..10u64 {
|
||
let feed = engine.feed(42, 7).expect("feed");
|
||
for item in &feed {
|
||
if matches!(item.label, forage_engine::ItemLabel::Exploring) {
|
||
// Signal positively to record a hit (6 of 10 rounds are positive).
|
||
if round < 6 {
|
||
engine
|
||
.signal(42, item.id, SignalKind::View)
|
||
.expect("view signal");
|
||
} else {
|
||
engine
|
||
.signal(42, item.id, SignalKind::Skip)
|
||
.expect("skip signal");
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
let stats = engine.exploration_stats(42);
|
||
assert!(
|
||
stats.exploration_total >= 6,
|
||
"should have at least 6 exploration outcomes, got {}",
|
||
stats.exploration_total
|
||
);
|
||
assert!(
|
||
stats.hit_rate() > 0.5,
|
||
"hit_rate should exceed 0.5, got {}",
|
||
stats.hit_rate()
|
||
);
|
||
assert_eq!(
|
||
stats.adaptive_ratio(),
|
||
0.25,
|
||
"adventurous user should get 0.25 exploration ratio"
|
||
);
|
||
}
|
||
|
||
/// Exploration ratio drops to 0.10 for a convergent user who ignores exploration items.
|
||
#[test]
|
||
fn adaptive_ratio_drops_for_convergent_user() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// Signal a lot on jazz items first so the user has a warm preference vector.
|
||
for item_id in 25u64..=34 {
|
||
engine
|
||
.signal(55, item_id, SignalKind::View)
|
||
.expect("view signal");
|
||
}
|
||
|
||
// Now simulate 10 rounds where the user always skips exploration items.
|
||
for _ in 0..10 {
|
||
let feed = engine.feed(55, 7).expect("feed");
|
||
for item in &feed {
|
||
if matches!(item.label, forage_engine::ItemLabel::Exploring) {
|
||
engine
|
||
.signal(55, item.id, SignalKind::Skip)
|
||
.expect("skip signal");
|
||
}
|
||
}
|
||
}
|
||
|
||
let stats = engine.exploration_stats(55);
|
||
// 10 rounds × at least 1 explore slot each → must have ≥5 outcomes.
|
||
assert!(
|
||
stats.exploration_total >= 5,
|
||
"should have ≥5 exploration outcomes after 10 rounds; got {}",
|
||
stats.exploration_total
|
||
);
|
||
assert!(
|
||
stats.hit_rate() < 0.2,
|
||
"convergent user hit_rate should be < 0.2, got {}",
|
||
stats.hit_rate()
|
||
);
|
||
assert_eq!(
|
||
stats.adaptive_ratio(),
|
||
0.10,
|
||
"convergent user should get 0.10 exploration ratio"
|
||
);
|
||
}
|
||
|
||
/// UCB1 bonus computation increases for categories with no signals.
|
||
#[test]
|
||
fn ucb1_bonus_higher_for_unseen_categories() {
|
||
use forage_engine::ExplorationStats;
|
||
|
||
let mut stats = ExplorationStats::default();
|
||
// Record 10 signals in "technology" and 0 in "jazz".
|
||
for _ in 0..10 {
|
||
stats.record_category_signal("technology");
|
||
}
|
||
|
||
let tech_bonus = stats.ucb1_bonus("technology");
|
||
let jazz_bonus = stats.ucb1_bonus("jazz");
|
||
|
||
assert!(
|
||
jazz_bonus > tech_bonus,
|
||
"unseen category (jazz) should get higher UCB1 bonus than explored category (technology); \
|
||
jazz={jazz_bonus:.3}, tech={tech_bonus:.3}"
|
||
);
|
||
// Zero total signals → zero bonus.
|
||
let empty = ExplorationStats::default();
|
||
assert_eq!(
|
||
empty.ucb1_bonus("anything"),
|
||
0.0,
|
||
"cold user gets zero UCB1 bonus"
|
||
);
|
||
}
|
||
|
||
/// Category signals are tracked when `signal()` is called.
|
||
#[test]
|
||
fn category_signals_tracked_on_signal_write() {
|
||
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
|
||
engine.seed_default_corpus().expect("seed corpus");
|
||
|
||
// Item IDs 25–34 are jazz items (per seed corpus).
|
||
engine
|
||
.signal(77, 25, SignalKind::View)
|
||
.expect("view signal");
|
||
engine
|
||
.signal(77, 26, SignalKind::Save)
|
||
.expect("save signal");
|
||
|
||
let stats = engine.exploration_stats(77);
|
||
let jazz_count = stats.category_signals.get("jazz").copied().unwrap_or(0);
|
||
assert_eq!(
|
||
jazz_count, 2,
|
||
"two jazz signals should be tracked; category_signals={:?}",
|
||
stats.category_signals
|
||
);
|
||
}
|