/// Forage engine smoke tests. /// /// These tests verify the full engine lifecycle: /// seed corpus load → signal write → feed generation → discovered item injection. /// They run against an ephemeral (in-memory) engine so they leave no disk state. use forage_engine::{ForageEngine, ForageItemInput, SignalKind}; // ── P2: Builder pattern tests ───────────────────────────────────────────────── /// `ForageEngine::builder().ephemeral().open()` is equivalent to `ForageEngine::ephemeral()`. #[test] fn builder_ephemeral_equivalent() { let engine = ForageEngine::builder() .ephemeral() .open() .expect("builder ephemeral"); engine.seed_default_corpus().expect("seed"); let feed = engine.feed(1, 7).expect("feed"); assert_eq!(feed.len(), 7); } /// Builder with `with_embedder` pointing at a non-existent URL falls back gracefully. /// `add_item` must not error out — it logs a warning and uses a neutral vector. #[test] fn builder_with_embedder_fallback_on_unavailable_sidecar() { // Point at a URL that will never respond. let engine = ForageEngine::builder() .ephemeral() .with_embedder("http://127.0.0.1:19999") // nothing listening here .open() .expect("builder with embedder"); engine.seed_default_corpus().expect("seed with fallback"); let item_id = engine .add_item(ForageItemInput { url: "https://example.com/p2-fallback".to_owned(), title: "P2 Fallback Article".to_owned(), source: "example.com".to_owned(), category: "technology".to_owned(), reading_time_min: 4, description: "Tests neutral vector fallback when embedder is down.".to_owned(), }) .expect("add_item must succeed even when embedder is unreachable"); // Item should still appear in feed via discovered_ids injection. let feed = engine.feed(99, 7).expect("feed"); assert!( feed.iter().any(|i| i.id == item_id), "discovered item should appear in feed even with fallback embedding" ); } /// Seeded corpus loads 100 items across ≥3 categories; feed returns requested limit. #[test] fn seed_corpus_and_cold_feed() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); let feed = engine.feed(1, 7).expect("feed"); assert_eq!(feed.len(), 7, "feed should return exactly 7 items"); let categories: std::collections::HashSet<_> = feed.iter().map(|item| item.category.as_str()).collect(); assert!( categories.len() >= 3, "cold feed should span ≥3 categories, got: {categories:?}" ); } /// User 3 (convergent, heavy tech+jazz signals) gets different feed than user 1 (cold). #[test] fn warm_user_differs_from_cold_user() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); let cold_feed: Vec = engine .feed(1, 7) .expect("cold feed") .iter() .map(|i| i.id) .collect(); let warm_feed: Vec = engine .feed(3, 7) .expect("warm feed") .iter() .map(|i| i.id) .collect(); // At least one item in the warm feed must differ from the cold feed. let cold_set: std::collections::HashSet<_> = cold_feed.iter().copied().collect(); let differs = warm_feed.iter().any(|id| !cold_set.contains(id)); assert!( differs, "warm user (user 3) feed should differ from cold user (user 1) feed" ); } /// Signals shift the feed toward the signaled category over time. #[test] fn signals_shift_feed_toward_signaled_category() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); // Fresh user — no preference vector. let before: Vec = engine .feed(99, 7) .expect("before feed") .iter() .map(|i| i.id) .collect(); // Write 10 strong signals on jazz items (ids 25–34). for item_id in 25u64..=34 { engine .signal(99, item_id, SignalKind::View) .expect("view signal"); engine .signal(99, item_id, SignalKind::Save) .expect("save signal"); } let after = engine.feed(99, 7).expect("after feed"); // After signaling, at least one jazz item must appear in the feed. let has_jazz = after.iter().any(|item| item.category == "jazz"); assert!( has_jazz, "feed should shift toward jazz after 10 jazz signals; got: {:?}", after .iter() .map(|i| (&i.title, &i.category)) .collect::>() ); // Before and after feeds differ (ranking shifted). let before_set: std::collections::HashSet<_> = before.iter().copied().collect(); let after_ids: Vec = after.iter().map(|i| i.id).collect(); let changed = after_ids.iter().any(|id| !before_set.contains(id)); assert!(changed, "feed should change after signals are written"); } /// `exploring` label appears: MAB always allocates 1 explore slot for limit=7. #[test] fn feed_contains_exploring_label() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); let feed = engine.feed(3, 7).expect("feed"); let has_exploring = feed .iter() .any(|item| matches!(item.label, forage_engine::ItemLabel::Exploring)); assert!( has_exploring, "feed of 7 items should always contain at least 1 Exploring item" ); } /// `add_item` is idempotent: same URL returns same ID without a second DB write. #[test] fn add_item_is_idempotent() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); let url = "https://example.com/test-article"; let input = || ForageItemInput { url: url.to_owned(), title: "Test Article".to_owned(), source: "example.com".to_owned(), category: "technology".to_owned(), reading_time_min: 5, description: "A test article for idempotency verification.".to_owned(), }; let id1 = engine.add_item(input()).expect("first add_item"); let id2 = engine.add_item(input()).expect("second add_item"); assert_eq!( id1, id2, "add_item must return the same ID for the same URL" ); assert!(id1 > 100_000, "discovered item ID must be above seed range"); } /// Discovered items appear in the feed after `add_item`. #[test] fn discovered_item_surfaces_in_feed() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); let url = "https://example.com/discovered-page"; let item_id = engine .add_item(ForageItemInput { url: url.to_owned(), title: "Discovered Page".to_owned(), source: "example.com".to_owned(), category: "design".to_owned(), reading_time_min: 3, description: "A page discovered via capture.".to_owned(), }) .expect("add_item"); // Use a different user (99) so the item is unseen and eligible. let feed = engine.feed(99, 7).expect("feed"); let in_feed = feed.iter().any(|item| item.id == item_id); assert!( in_feed, "newly added item {item_id} should appear in the next feed" ); } /// `semantic_search` returns results even without an embedder (neutral-vector fallback). /// When no sidecar is configured the query vector is uniform, so the assertion is /// only that the method succeeds and returns ≤ requested limit — not semantic quality. #[test] fn semantic_search_returns_results_without_embedder() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); // Without an embedder, embed_text returns a neutral unit vector. The ANN // query succeeds but results have no semantic ordering. let results = engine .semantic_search("jazz theory", 5) .expect("semantic_search should succeed even without embedder"); assert!( results.len() <= 5, "should respect requested limit; got {}", results.len() ); // Must return at least some results from the 100-item corpus. assert!( !results.is_empty(), "semantic_search on a seeded corpus should return at least one item" ); } /// `similar_to` returns items for a valid seed item ID. #[test] fn similar_to_returns_items_for_seed_item() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); // Item 1 is a seed item with a category-axis embedding; similar_to should // find items with close vectors (same or nearby category). let results = engine .similar_to(1, 5) .expect("similar_to should succeed for a seeded item"); assert!( results.len() <= 5, "should respect requested limit; got {}", results.len() ); assert!( !results.is_empty(), "similar_to on a seeded corpus should return at least one item" ); // The source item (id=1) should be excluded from its own similarity results. assert!( !results.iter().any(|i| i.id == 1), "source item should be excluded from similar_to results" ); } /// After saving items, `feed` augments the pool with semantically similar candidates. #[test] fn similar_to_saved_augments_feed_pool() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); // User 99 has no history. Record a "save" signal on a jazz item (id 25). engine .signal(99, 25, SignalKind::Save) .expect("save signal"); // Feed should succeed; the similar_to_saved path is exercised. let feed = engine.feed(99, 7).expect("feed after save"); assert_eq!(feed.len(), 7, "feed should return 7 items"); } /// `top_categories` returns empty for cold users and non-empty for warm users. #[test] fn top_categories_reflects_user_state() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); // User 1 is cold — no preference vector. let cold = engine.top_categories(1); assert!( cold.is_empty(), "cold user should have no top categories, got: {cold:?}" ); // User 3 is convergent on tech+jazz — should have preferences. let warm = engine.top_categories(3); assert!( !warm.is_empty(), "warm user (user 3) should have top categories" ); } // ── P4: Bridge item (surprise moment) tests ─────────────────────────────────── /// Warm user with signals in 2+ categories gets a Bridge-labelled item in feed. #[test] fn bridge_item_appears_for_warm_user() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); // User 3 is convergent on tech+jazz — has ≥2 active preference dimensions. let feed = engine.feed(3, 7).expect("feed"); let has_bridge = feed .iter() .any(|item| matches!(&item.label, forage_engine::ItemLabel::Bridge { .. })); assert!( has_bridge, "warm user (user 3) with tech+jazz signals should get a Bridge item; \ labels: {:?}", feed.iter() .map(|i| (&i.category, format!("{:?}", i.label))) .collect::>() ); } /// Cold-start user (no preference vector) does not receive a Bridge item. #[test] fn bridge_item_absent_for_cold_user() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); // User 1 is cold — no signals, no preference vector. let feed = engine.feed(1, 7).expect("cold feed"); let has_bridge = feed .iter() .any(|item| matches!(&item.label, forage_engine::ItemLabel::Bridge { .. })); assert!( !has_bridge, "cold user (user 1) should not receive a Bridge item; \ labels: {:?}", feed.iter() .map(|i| (&i.category, format!("{:?}", i.label))) .collect::>() ); } /// Bridge label carries distinct, non-empty category names. #[test] fn bridge_label_carries_category_names() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); let feed = engine.feed(3, 7).expect("feed"); let bridge = feed .iter() .find(|item| matches!(&item.label, forage_engine::ItemLabel::Bridge { .. })) .expect("user 3 should have a Bridge item"); let forage_engine::ItemLabel::Bridge { cat_a, cat_b } = &bridge.label else { panic!("expected Bridge label"); }; assert!(!cat_a.is_empty(), "cat_a must not be empty"); assert!(!cat_b.is_empty(), "cat_b must not be empty"); assert_ne!( cat_a, cat_b, "bridge categories must be distinct; got both = {cat_a}" ); } // ── P3: Adaptive MAB tests ───────────────────────────────────────────────────── /// Cold-start user has default exploration ratio and zero stats. #[test] fn adaptive_ratio_defaults_for_cold_user() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); let stats = engine.exploration_stats(99); assert_eq!( stats.exploration_total, 0, "cold user has no exploration history" ); assert_eq!( stats.adaptive_ratio(), 0.14, "cold user uses default exploration ratio" ); } /// Exploration ratio rises to 0.25 after enough positive exploration hits. #[test] fn adaptive_ratio_rises_for_adventurous_user() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); // Simulate the user engaging with 6 out of 10 exploring items (hit_rate = 0.6 > 0.5). // We write a feed to populate last_explore_items, then signal the exploring item. // Repeat until we have enough exploration history. for round in 0..10u64 { let feed = engine.feed(42, 7).expect("feed"); for item in &feed { if matches!(item.label, forage_engine::ItemLabel::Exploring) { // Signal positively to record a hit (6 of 10 rounds are positive). if round < 6 { engine .signal(42, item.id, SignalKind::View) .expect("view signal"); } else { engine .signal(42, item.id, SignalKind::Skip) .expect("skip signal"); } } } } let stats = engine.exploration_stats(42); assert!( stats.exploration_total >= 6, "should have at least 6 exploration outcomes, got {}", stats.exploration_total ); assert!( stats.hit_rate() > 0.5, "hit_rate should exceed 0.5, got {}", stats.hit_rate() ); assert_eq!( stats.adaptive_ratio(), 0.25, "adventurous user should get 0.25 exploration ratio" ); } /// Exploration ratio drops to 0.10 for a convergent user who ignores exploration items. #[test] fn adaptive_ratio_drops_for_convergent_user() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); // Signal a lot on jazz items first so the user has a warm preference vector. for item_id in 25u64..=34 { engine .signal(55, item_id, SignalKind::View) .expect("view signal"); } // Now simulate 10 rounds where the user always skips exploration items. for _ in 0..10 { let feed = engine.feed(55, 7).expect("feed"); for item in &feed { if matches!(item.label, forage_engine::ItemLabel::Exploring) { engine .signal(55, item.id, SignalKind::Skip) .expect("skip signal"); } } } let stats = engine.exploration_stats(55); // 10 rounds × at least 1 explore slot each → must have ≥5 outcomes. assert!( stats.exploration_total >= 5, "should have ≥5 exploration outcomes after 10 rounds; got {}", stats.exploration_total ); assert!( stats.hit_rate() < 0.2, "convergent user hit_rate should be < 0.2, got {}", stats.hit_rate() ); assert_eq!( stats.adaptive_ratio(), 0.10, "convergent user should get 0.10 exploration ratio" ); } /// UCB1 bonus computation increases for categories with no signals. #[test] fn ucb1_bonus_higher_for_unseen_categories() { use forage_engine::ExplorationStats; let mut stats = ExplorationStats::default(); // Record 10 signals in "technology" and 0 in "jazz". for _ in 0..10 { stats.record_category_signal("technology"); } let tech_bonus = stats.ucb1_bonus("technology"); let jazz_bonus = stats.ucb1_bonus("jazz"); assert!( jazz_bonus > tech_bonus, "unseen category (jazz) should get higher UCB1 bonus than explored category (technology); \ jazz={jazz_bonus:.3}, tech={tech_bonus:.3}" ); // Zero total signals → zero bonus. let empty = ExplorationStats::default(); assert_eq!( empty.ucb1_bonus("anything"), 0.0, "cold user gets zero UCB1 bonus" ); } /// Category signals are tracked when `signal()` is called. #[test] fn category_signals_tracked_on_signal_write() { let engine = ForageEngine::ephemeral().expect("ephemeral engine"); engine.seed_default_corpus().expect("seed corpus"); // Item IDs 25–34 are jazz items (per seed corpus). engine .signal(77, 25, SignalKind::View) .expect("view signal"); engine .signal(77, 26, SignalKind::Save) .expect("save signal"); let stats = engine.exploration_stats(77); let jazz_count = stats.category_signals.get("jazz").copied().unwrap_or(0); assert_eq!( jazz_count, 2, "two jazz signals should be tracked; category_signals={:?}", stats.category_signals ); }