tidaldb/applications/forage/engine/tests/smoke.rs
jordan f4cfd6c81f feat: complete M8 replication primitives + forage enhancements + docs
Milestone 8 (phases 1-4):
- Shard-aware WAL segment naming, BatchHeader v2, ShardRouter
- Transport trait, InProcessTransport, WalShipper, FollowerDb
- HLC, PNCounter, LWWRegister, CrdtSignalState, ReconciliationEngine
- Session replication bridge with SeqNo/HWM, idempotency store

Forage application:
- Multi-source discovery engine with MAB exploration
- Embedding-based label system, server handlers, UI refresh

Other:
- QUICKSTART.md, README.md, milestone-8 planning docs
- Hard negative union semantics, RLHF export enhancements
- Recovery benchmark and visibility test expansions
- Split 8 oversized source files per CODING_GUIDELINES §9

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 13:17:19 -07:00

876 lines
30 KiB
Rust
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/// Forage engine smoke tests.
///
/// These tests verify the full engine lifecycle:
/// seed corpus load → signal write → feed generation → discovered item injection.
/// They run against an ephemeral (in-memory) engine so they leave no disk state.
use forage_engine::{ForageEngine, ForageItemInput, SignalKind};
// ── P2: Builder pattern tests ─────────────────────────────────────────────────
/// `ForageEngine::builder().ephemeral().open()` is equivalent to `ForageEngine::ephemeral()`.
#[test]
fn builder_ephemeral_equivalent() {
let engine = ForageEngine::builder()
.ephemeral()
.open()
.expect("builder ephemeral");
engine.seed_default_corpus().expect("seed");
let feed = engine.feed(1, 7).expect("feed");
assert_eq!(feed.len(), 7);
}
/// Builder with `with_embedder` pointing at a non-existent URL falls back gracefully.
/// `add_item` must not error out — it logs a warning and uses a neutral vector.
#[test]
fn builder_with_embedder_fallback_on_unavailable_sidecar() {
// Point at a URL that will never respond.
let engine = ForageEngine::builder()
.ephemeral()
.with_embedder("http://127.0.0.1:19999") // nothing listening here
.open()
.expect("builder with embedder");
engine.seed_default_corpus().expect("seed with fallback");
let item_id = engine
.add_item(ForageItemInput {
url: "https://example.com/p2-fallback".to_owned(),
title: "P2 Fallback Article".to_owned(),
source: "example.com".to_owned(),
category: "technology".to_owned(),
reading_time_min: 4,
description: "Tests neutral vector fallback when embedder is down.".to_owned(),
tags: vec![],
entities: vec![],
content_type: String::new(),
summary: String::new(),
})
.expect("add_item must succeed even when embedder is unreachable");
// Item should still appear in feed via discovered_ids injection.
let feed = engine.feed(99, 7).expect("feed");
assert!(
feed.iter().any(|i| i.id == item_id),
"discovered item should appear in feed even with fallback embedding"
);
}
/// Seeded corpus loads 100 items across ≥3 categories; feed returns requested limit.
#[test]
fn seed_corpus_and_cold_feed() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
let feed = engine.feed(1, 7).expect("feed");
assert_eq!(feed.len(), 7, "feed should return exactly 7 items");
let categories: std::collections::HashSet<_> =
feed.iter().map(|item| item.category.as_str()).collect();
assert!(
categories.len() >= 3,
"cold feed should span ≥3 categories, got: {categories:?}"
);
}
/// User 3 (convergent, heavy tech+jazz signals) gets different feed than user 1 (cold).
#[test]
fn warm_user_differs_from_cold_user() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
let cold_feed: Vec<u64> = engine
.feed(1, 7)
.expect("cold feed")
.iter()
.map(|i| i.id)
.collect();
let warm_feed: Vec<u64> = engine
.feed(3, 7)
.expect("warm feed")
.iter()
.map(|i| i.id)
.collect();
// At least one item in the warm feed must differ from the cold feed.
let cold_set: std::collections::HashSet<_> = cold_feed.iter().copied().collect();
let differs = warm_feed.iter().any(|id| !cold_set.contains(id));
assert!(
differs,
"warm user (user 3) feed should differ from cold user (user 1) feed"
);
}
/// Signals shift the feed toward the signaled category over time.
#[test]
fn signals_shift_feed_toward_signaled_category() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
// Fresh user — no preference vector.
let before: Vec<u64> = engine
.feed(99, 7)
.expect("before feed")
.iter()
.map(|i| i.id)
.collect();
// Write 10 strong signals on jazz items (ids 2534).
for item_id in 25u64..=34 {
engine
.signal(99, item_id, SignalKind::View)
.expect("view signal");
engine
.signal(99, item_id, SignalKind::Save)
.expect("save signal");
}
let after = engine.feed(99, 7).expect("after feed");
// After signaling, at least one jazz item must appear in the feed.
let has_jazz = after.iter().any(|item| item.category == "jazz");
assert!(
has_jazz,
"feed should shift toward jazz after 10 jazz signals; got: {:?}",
after
.iter()
.map(|i| (&i.title, &i.category))
.collect::<Vec<_>>()
);
// Before and after feeds differ (ranking shifted).
let before_set: std::collections::HashSet<_> = before.iter().copied().collect();
let after_ids: Vec<u64> = after.iter().map(|i| i.id).collect();
let changed = after_ids.iter().any(|id| !before_set.contains(id));
assert!(changed, "feed should change after signals are written");
}
/// `exploring` label appears: MAB always allocates 1 explore slot for limit=7.
#[test]
fn feed_contains_exploring_label() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
let feed = engine.feed(3, 7).expect("feed");
let has_exploring = feed
.iter()
.any(|item| matches!(item.label, forage_engine::ItemLabel::Exploring));
assert!(
has_exploring,
"feed of 7 items should always contain at least 1 Exploring item"
);
}
/// `add_item` is idempotent: same URL returns same ID without a second DB write.
#[test]
fn add_item_is_idempotent() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
let url = "https://example.com/test-article";
let input = || ForageItemInput {
url: url.to_owned(),
title: "Test Article".to_owned(),
source: "example.com".to_owned(),
category: "technology".to_owned(),
reading_time_min: 5,
description: "A test article for idempotency verification.".to_owned(),
tags: vec![],
entities: vec![],
content_type: String::new(),
summary: String::new(),
};
let id1 = engine.add_item(input()).expect("first add_item");
let id2 = engine.add_item(input()).expect("second add_item");
assert_eq!(
id1, id2,
"add_item must return the same ID for the same URL"
);
assert!(id1 > 100_000, "discovered item ID must be above seed range");
}
/// Discovered items appear in the feed after `add_item`.
#[test]
fn discovered_item_surfaces_in_feed() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
let url = "https://example.com/discovered-page";
let item_id = engine
.add_item(ForageItemInput {
url: url.to_owned(),
title: "Discovered Page".to_owned(),
source: "example.com".to_owned(),
category: "design".to_owned(),
reading_time_min: 3,
description: "A page discovered via capture.".to_owned(),
tags: vec![],
entities: vec![],
content_type: String::new(),
summary: String::new(),
})
.expect("add_item");
// Use a different user (99) so the item is unseen and eligible.
let feed = engine.feed(99, 7).expect("feed");
let in_feed = feed.iter().any(|item| item.id == item_id);
assert!(
in_feed,
"newly added item {item_id} should appear in the next feed"
);
}
/// `semantic_search` returns results even without an embedder (neutral-vector fallback).
/// When no sidecar is configured the query vector is uniform, so the assertion is
/// only that the method succeeds and returns ≤ requested limit — not semantic quality.
#[test]
fn semantic_search_returns_results_without_embedder() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
// Without an embedder, embed_text returns a neutral unit vector. The ANN
// query succeeds but results have no semantic ordering.
let results = engine
.semantic_search("jazz theory", 5)
.expect("semantic_search should succeed even without embedder");
assert!(
results.len() <= 5,
"should respect requested limit; got {}",
results.len()
);
// Must return at least some results from the 100-item corpus.
assert!(
!results.is_empty(),
"semantic_search on a seeded corpus should return at least one item"
);
}
/// `similar_to` returns items for a valid seed item ID.
#[test]
fn similar_to_returns_items_for_seed_item() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
// Item 1 is a seed item with a category-axis embedding; similar_to should
// find items with close vectors (same or nearby category).
let results = engine
.similar_to(1, 5)
.expect("similar_to should succeed for a seeded item");
assert!(
results.len() <= 5,
"should respect requested limit; got {}",
results.len()
);
assert!(
!results.is_empty(),
"similar_to on a seeded corpus should return at least one item"
);
// The source item (id=1) should be excluded from its own similarity results.
assert!(
!results.iter().any(|i| i.id == 1),
"source item should be excluded from similar_to results"
);
}
/// After saving items, `feed` augments the pool with semantically similar candidates.
#[test]
fn similar_to_saved_augments_feed_pool() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
// User 99 has no history. Record a "save" signal on a jazz item (id 25).
engine
.signal(99, 25, SignalKind::Save)
.expect("save signal");
// Feed should succeed; the similar_to_saved path is exercised.
let feed = engine.feed(99, 7).expect("feed after save");
assert_eq!(feed.len(), 7, "feed should return 7 items");
}
/// `top_categories` returns empty for cold users and non-empty for warm users.
#[test]
fn top_categories_reflects_user_state() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
// User 1 is cold — no preference vector.
let cold = engine.top_categories(1);
assert!(
cold.is_empty(),
"cold user should have no top categories, got: {cold:?}"
);
// User 3 is convergent on tech+jazz — should have preferences.
let warm = engine.top_categories(3);
assert!(
!warm.is_empty(),
"warm user (user 3) should have top categories"
);
}
// ── P4: Bridge item (surprise moment) tests ───────────────────────────────────
/// Warm user with signals in 2+ categories gets a Bridge-labelled item in feed.
#[test]
fn bridge_item_appears_for_warm_user() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
// User 3 is convergent on tech+jazz — has ≥2 active preference dimensions.
let feed = engine.feed(3, 7).expect("feed");
let has_bridge = feed
.iter()
.any(|item| matches!(&item.label, forage_engine::ItemLabel::Bridge { .. }));
assert!(
has_bridge,
"warm user (user 3) with tech+jazz signals should get a Bridge item; \
labels: {:?}",
feed.iter()
.map(|i| (&i.category, format!("{:?}", i.label)))
.collect::<Vec<_>>()
);
}
/// Cold-start user (no preference vector) does not receive a Bridge item.
#[test]
fn bridge_item_absent_for_cold_user() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
// User 1 is cold — no signals, no preference vector.
let feed = engine.feed(1, 7).expect("cold feed");
let has_bridge = feed
.iter()
.any(|item| matches!(&item.label, forage_engine::ItemLabel::Bridge { .. }));
assert!(
!has_bridge,
"cold user (user 1) should not receive a Bridge item; \
labels: {:?}",
feed.iter()
.map(|i| (&i.category, format!("{:?}", i.label)))
.collect::<Vec<_>>()
);
}
/// Bridge label carries distinct, non-empty category names.
#[test]
fn bridge_label_carries_category_names() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
let feed = engine.feed(3, 7).expect("feed");
let bridge = feed
.iter()
.find(|item| matches!(&item.label, forage_engine::ItemLabel::Bridge { .. }))
.expect("user 3 should have a Bridge item");
let forage_engine::ItemLabel::Bridge { cat_a, cat_b } = &bridge.label else {
panic!("expected Bridge label");
};
assert!(!cat_a.is_empty(), "cat_a must not be empty");
assert!(!cat_b.is_empty(), "cat_b must not be empty");
assert_ne!(
cat_a, cat_b,
"bridge categories must be distinct; got both = {cat_a}"
);
}
// ── P3: Adaptive MAB tests ─────────────────────────────────────────────────────
/// Cold-start user has default exploration ratio and zero stats.
#[test]
fn adaptive_ratio_defaults_for_cold_user() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
let stats = engine.exploration_stats(99);
assert_eq!(
stats.exploration_total, 0,
"cold user has no exploration history"
);
assert_eq!(
stats.adaptive_ratio(),
0.14,
"cold user uses default exploration ratio"
);
}
/// Exploration ratio rises to 0.25 after enough positive exploration hits.
#[test]
fn adaptive_ratio_rises_for_adventurous_user() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
// Simulate the user engaging with 6 out of 10 exploring items (hit_rate = 0.6 > 0.5).
// We write a feed to populate last_explore_items, then signal the exploring item.
// Repeat until we have enough exploration history.
for round in 0..10u64 {
let feed = engine.feed(42, 7).expect("feed");
for item in &feed {
if matches!(item.label, forage_engine::ItemLabel::Exploring) {
// Signal positively to record a hit (6 of 10 rounds are positive).
if round < 6 {
engine
.signal(42, item.id, SignalKind::View)
.expect("view signal");
} else {
engine
.signal(42, item.id, SignalKind::Skip)
.expect("skip signal");
}
}
}
}
let stats = engine.exploration_stats(42);
assert!(
stats.exploration_total >= 6,
"should have at least 6 exploration outcomes, got {}",
stats.exploration_total
);
assert!(
stats.hit_rate() > 0.5,
"hit_rate should exceed 0.5, got {}",
stats.hit_rate()
);
assert_eq!(
stats.adaptive_ratio(),
0.25,
"adventurous user should get 0.25 exploration ratio"
);
}
/// Exploration ratio drops to 0.10 for a convergent user who ignores exploration items.
#[test]
fn adaptive_ratio_drops_for_convergent_user() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
// Signal a lot on jazz items first so the user has a warm preference vector.
for item_id in 25u64..=34 {
engine
.signal(55, item_id, SignalKind::View)
.expect("view signal");
}
// Now simulate 10 rounds where the user always skips exploration items.
for _ in 0..10 {
let feed = engine.feed(55, 7).expect("feed");
for item in &feed {
if matches!(item.label, forage_engine::ItemLabel::Exploring) {
engine
.signal(55, item.id, SignalKind::Skip)
.expect("skip signal");
}
}
}
let stats = engine.exploration_stats(55);
// 10 rounds × at least 1 explore slot each → must have ≥5 outcomes.
assert!(
stats.exploration_total >= 5,
"should have ≥5 exploration outcomes after 10 rounds; got {}",
stats.exploration_total
);
assert!(
stats.hit_rate() < 0.2,
"convergent user hit_rate should be < 0.2, got {}",
stats.hit_rate()
);
assert_eq!(
stats.adaptive_ratio(),
0.10,
"convergent user should get 0.10 exploration ratio"
);
}
/// UCB1 bonus computation increases for categories with no signals.
#[test]
fn ucb1_bonus_higher_for_unseen_categories() {
use forage_engine::ExplorationStats;
let mut stats = ExplorationStats::default();
// Record 10 signals in "technology" and 0 in "jazz".
for _ in 0..10 {
stats.record_category_signal("technology");
}
let tech_bonus = stats.ucb1_bonus("technology");
let jazz_bonus = stats.ucb1_bonus("jazz");
assert!(
jazz_bonus > tech_bonus,
"unseen category (jazz) should get higher UCB1 bonus than explored category (technology); \
jazz={jazz_bonus:.3}, tech={tech_bonus:.3}"
);
// Zero total signals → zero bonus.
let empty = ExplorationStats::default();
assert_eq!(
empty.ucb1_bonus("anything"),
0.0,
"cold user gets zero UCB1 bonus"
);
}
/// Category signals are tracked when `signal()` is called.
#[test]
fn category_signals_tracked_on_signal_write() {
let engine = ForageEngine::ephemeral().expect("ephemeral engine");
engine.seed_default_corpus().expect("seed corpus");
// Item IDs 2534 are jazz items (per seed corpus).
engine
.signal(77, 25, SignalKind::View)
.expect("view signal");
engine
.signal(77, 26, SignalKind::Save)
.expect("save signal");
let stats = engine.exploration_stats(77);
let jazz_count = stats.category_signals.get("jazz").copied().unwrap_or(0);
assert_eq!(
jazz_count, 2,
"two jazz signals should be tracked; category_signals={:?}",
stats.category_signals
);
}
// ── Browse tasks tests ──────────────────────────────────────────────────────
/// Cold user (no signals) gets all 8 source categories at equal priority.
#[test]
fn browse_tasks_cold_start_equal_weights() {
let engine = ForageEngine::ephemeral().unwrap();
engine.seed_default_corpus().unwrap();
// Cold user (no signals) — all 8 categories should have equal priority ~0.125.
let plan = engine.browse_tasks(99, 5); // user 99 has no signals
assert!(plan.should_run);
assert_eq!(plan.limit_per_topic, 5);
assert_eq!(plan.interval_minutes, 30);
assert_eq!(
plan.topics.len(),
8,
"all 8 source categories should be present"
);
// All priorities should be equal (within floating point tolerance).
let first_priority = plan.topics[0].priority;
for topic in &plan.topics {
assert!(
(topic.priority - first_priority).abs() < 1e-5,
"cold-start topics should have equal priority, got {} and {}",
first_priority,
topic.priority
);
}
// Every topic must have at least 1 source URL.
for topic in &plan.topics {
assert!(
!topic.sources.is_empty(),
"topic '{}' has no sources",
topic.name
);
}
// Cold start: no tag hints.
assert!(
plan.tag_hints.is_empty(),
"cold user should have no tag hints"
);
}
/// Warm user with jazz saves gets jazz as the highest-priority browse topic.
#[test]
fn browse_tasks_warm_user_top_category_ranks_first() {
let engine = ForageEngine::ephemeral().unwrap();
engine.seed_default_corpus().unwrap();
// Give user 1 several saves on jazz items to build a preference.
// Find jazz seed items and save them.
let jazz_items: Vec<u64> = engine
.all_items()
.iter()
.filter(|s| s.category == "jazz")
.take(5)
.map(|s| s.id)
.collect();
assert!(!jazz_items.is_empty(), "seed corpus should have jazz items");
for id in &jazz_items {
engine.signal(1, *id, SignalKind::Save).unwrap();
}
let plan = engine.browse_tasks(1, 5);
// Jazz should be the highest-priority topic.
assert!(!plan.topics.is_empty());
assert_eq!(
plan.topics[0].name,
"jazz",
"jazz should rank first after jazz saves, got: {:?}",
plan.topics
.iter()
.map(|t| (&t.name, t.priority))
.collect::<Vec<_>>()
);
// Jazz's priority should be higher than all other topics.
let jazz_priority = plan.topics[0].priority;
for other in plan.topics.iter().skip(1) {
assert!(
jazz_priority > other.priority,
"jazz ({}) should outrank {} ({})",
jazz_priority,
other.name,
other.priority
);
}
}
/// Tag hints are populated from saved items' tags.
#[test]
fn browse_tasks_tag_hints_populated_from_saves() {
let engine = ForageEngine::ephemeral().unwrap();
engine.seed_default_corpus().unwrap();
// Capture an item with tags and save it.
let input = ForageItemInput {
url: "https://example.com/modal-jazz-article".to_string(),
title: "A Guide to Modal Jazz".to_string(),
source: "example.com".to_string(),
category: "jazz".to_string(),
reading_time_min: 8,
description: "Deep dive into modal jazz techniques.".to_string(),
tags: vec![
"modal jazz".to_string(),
"music theory".to_string(),
"coltrane".to_string(),
],
entities: vec!["John Coltrane".to_string()],
content_type: "tutorial".to_string(),
summary: "Explores the harmonic language of modal jazz. Coltrane is the central focus."
.to_string(),
};
let item_id = engine.add_item(input).unwrap();
// Save the item for user 1.
engine.signal(1, item_id, SignalKind::Save).unwrap();
let plan = engine.browse_tasks(1, 5);
// Tag hints should contain the tags from the saved item.
assert!(
plan.tag_hints.contains(&"modal jazz".to_string()),
"tag_hints should contain 'modal jazz', got: {:?}",
plan.tag_hints
);
assert!(
plan.tag_hints.contains(&"music theory".to_string()),
"tag_hints should contain 'music theory', got: {:?}",
plan.tag_hints
);
}
// ── Top tags tests ──────────────────────────────────────────────────────────
/// Cold user (no saves) gets empty top_tags.
#[test]
fn top_tags_empty_for_cold_user() {
let engine = ForageEngine::ephemeral().unwrap();
engine.seed_default_corpus().unwrap();
// User 99 has no saves — top_tags should return empty.
let tags = engine.top_tags(99, 5);
assert!(
tags.is_empty(),
"cold user should have no tags, got: {:?}",
tags
);
}
/// Top tags are ordered by frequency of occurrence across saved items.
#[test]
fn top_tags_frequency_ranked() {
let engine = ForageEngine::ephemeral().unwrap();
engine.seed_default_corpus().unwrap();
// Capture 3 items with overlapping tags and save them.
// "rust" appears 3 times, "async" appears 2 times, "wasm" appears 1 time.
let items = vec![
(
"https://example.com/rust-async",
vec!["rust", "async", "tokio"],
),
("https://example.com/rust-wasm", vec!["rust", "wasm"]),
("https://example.com/rust-futures", vec!["rust", "async"]),
];
for (url, tags) in items {
let input = ForageItemInput {
url: url.to_string(),
title: format!("Article: {url}"),
source: "example.com".to_string(),
category: "technology".to_string(),
reading_time_min: 5,
description: String::new(),
tags: tags.iter().map(|s| s.to_string()).collect(),
entities: vec![],
content_type: "tutorial".to_string(),
summary: String::new(),
};
let id = engine.add_item(input).unwrap();
engine.signal(1, id, SignalKind::Save).unwrap();
}
let tags = engine.top_tags(1, 5);
// "rust" appears 3x — must be first.
assert!(!tags.is_empty(), "should have tags after saves");
assert_eq!(
tags[0], "rust",
"most frequent tag should be first, got: {:?}",
tags
);
// "async" appears 2x — must rank above "wasm" (1x).
let async_pos = tags
.iter()
.position(|t| t == "async")
.expect("async should be present");
let wasm_pos = tags
.iter()
.position(|t| t == "wasm")
.expect("wasm should be present");
assert!(
async_pos < wasm_pos,
"async (2x) should rank before wasm (1x)"
);
}
/// Enrichment fields (tags, entities, content_type, summary) stored via `add_item`
/// are hydrated on feed items returned by `feed()`.
/// Regression guard for the feed enrichment hydration path added in fix-all.
#[test]
fn discovered_item_enrichment_preserved_in_feed() {
let engine = ForageEngine::ephemeral().unwrap();
engine.seed_default_corpus().unwrap();
let item_id = engine
.add_item(ForageItemInput {
url: "https://example.com/enriched-article".to_string(),
title: "Enriched Article".to_string(),
source: "example.com".to_string(),
category: "technology".to_string(),
reading_time_min: 6,
description: "An article with full enrichment metadata.".to_string(),
tags: vec!["rust".to_string(), "async".to_string()],
entities: vec!["Tokio".to_string()],
content_type: "tutorial".to_string(),
summary: "Teaches async Rust. Tokio is the runtime used throughout.".to_string(),
})
.unwrap();
// Retrieve feed for a fresh user so the discovered item is injected.
let feed = engine.feed(99, 7).unwrap();
let item = feed
.iter()
.find(|i| i.id == item_id)
.expect("discovered item should appear in feed");
assert_eq!(
item.tags,
vec!["rust", "async"],
"feed item should carry its stored tags, got: {:?}",
item.tags
);
assert_eq!(
item.entities,
vec!["Tokio"],
"feed item should carry its stored entities, got: {:?}",
item.entities
);
assert_eq!(
item.content_type, "tutorial",
"feed item should carry its stored content_type"
);
assert!(
!item.summary.is_empty(),
"feed item should carry its stored summary, got empty string"
);
}
/// Items the user dwelled on for ≥15 seconds contribute to `top_tags`,
/// even if they were never explicitly saved.
#[test]
fn top_tags_includes_dwell_items() {
let engine = ForageEngine::ephemeral().unwrap();
engine.seed_default_corpus().unwrap();
// Add an item with distinctive tags.
let item_id = engine
.add_item(ForageItemInput {
url: "https://example.com/dwell-tagged-article".to_string(),
title: "Deep Read Article".to_string(),
source: "example.com".to_string(),
category: "science".to_string(),
reading_time_min: 10,
description: "An article worth reading slowly.".to_string(),
tags: vec!["quantum computing".to_string(), "research".to_string()],
entities: vec![],
content_type: "research".to_string(),
summary: "Explores quantum error correction. Practical applications are assessed."
.to_string(),
})
.unwrap();
// Dwell ≥15 000 ms (completion threshold) without saving.
engine.signal_dwell(88, item_id, 20_000).unwrap();
// top_tags should include tags from the dwelled item.
let tags = engine.top_tags(88, 5);
assert!(
tags.contains(&"quantum computing".to_string()),
"top_tags should include tags from completion-dwell items; got: {:?}",
tags
);
// Short dwell (< 15 s) should NOT contribute tags.
let item_id2 = engine
.add_item(ForageItemInput {
url: "https://example.com/short-dwell-article".to_string(),
title: "Brief Glance Article".to_string(),
source: "example.com".to_string(),
category: "science".to_string(),
reading_time_min: 5,
description: "Skimmed article.".to_string(),
tags: vec!["astronomy".to_string()],
entities: vec![],
content_type: "news".to_string(),
summary: "Brief overview of recent astronomy findings.".to_string(),
})
.unwrap();
engine.signal_dwell(88, item_id2, 5_000).unwrap(); // only 5 seconds
let tags_after = engine.top_tags(88, 10);
assert!(
!tags_after.contains(&"astronomy".to_string()),
"short dwell (<15s) should not contribute tags; got: {:?}",
tags_after
);
}
/// `signal_dwell` is re-exported properly for test use.
/// Quick sanity check that the method exists and accepts valid parameters.
#[test]
fn signal_dwell_method_exists() {
let engine = ForageEngine::ephemeral().unwrap();
engine.seed_default_corpus().unwrap();
// 30 seconds of dwell on a seed item — should succeed without error.
engine.signal_dwell(1, 1, 30_000).unwrap();
}