197 lines
6.3 KiB
Rust
197 lines
6.3 KiB
Rust
//! Tantivy merge policy integration tests.
|
|
//!
|
|
//! These tests are marked `#[ignore]` because they require large-scale ingestion
|
|
//! (1M items) and are slow by design — they are intended for manual verification
|
|
//! of merge policy behaviour, not CI regression detection.
|
|
//!
|
|
//! Run them with:
|
|
//! ```text
|
|
//! cargo test --manifest-path tidal/Cargo.toml --test tantivy_merge -- --ignored
|
|
//! ```
|
|
|
|
#![allow(clippy::unwrap_used, clippy::cast_precision_loss)]
|
|
|
|
use std::collections::HashMap;
|
|
use std::time::{Duration, Instant};
|
|
|
|
use tidaldb::TidalDb;
|
|
use tidaldb::schema::{DecaySpec, EntityId, EntityKind, SchemaBuilder, TextFieldType};
|
|
|
|
fn make_db_with_text() -> TidalDb {
|
|
let mut builder = SchemaBuilder::new();
|
|
let _ = builder
|
|
.signal(
|
|
"view",
|
|
EntityKind::Item,
|
|
DecaySpec::Exponential {
|
|
half_life: Duration::from_secs(7 * 24 * 3600),
|
|
},
|
|
)
|
|
.add();
|
|
builder.text_field("title", TextFieldType::Text);
|
|
builder.text_field("category", TextFieldType::Keyword);
|
|
let schema = builder.build().unwrap();
|
|
|
|
TidalDb::builder()
|
|
.ephemeral()
|
|
.with_schema(schema)
|
|
.open()
|
|
.unwrap()
|
|
}
|
|
|
|
/// Verify that Tantivy segment count stays below 20 after 1M-item ingestion
|
|
/// followed by 10 steady-state write rounds.
|
|
///
|
|
/// **Manual verification test** — not run in CI.
|
|
///
|
|
/// Acceptance criterion: `segment_count() < 20` after all rounds.
|
|
#[test]
|
|
#[ignore = "manual verification: takes ~3 minutes to ingest 1M items"]
|
|
fn tantivy_segment_evolution() {
|
|
let db = make_db_with_text();
|
|
|
|
eprintln!("[tantivy_merge] Ingesting 1M items...");
|
|
let t0 = Instant::now();
|
|
for i in 0u64..1_000_000 {
|
|
let mut meta = HashMap::new();
|
|
meta.insert(
|
|
"title".to_string(),
|
|
format!("Content item {i} about category {}", i % 20),
|
|
);
|
|
meta.insert("category".to_string(), format!("cat{}", i % 20));
|
|
db.write_item_with_metadata(EntityId::new(i + 1), &meta)
|
|
.unwrap();
|
|
}
|
|
eprintln!(
|
|
"[tantivy_merge] Ingestion done in {:.1}s",
|
|
t0.elapsed().as_secs_f32()
|
|
);
|
|
|
|
// Allow text syncer to commit all 1M items.
|
|
std::thread::sleep(Duration::from_millis(5000));
|
|
db.reload_text_index().unwrap();
|
|
|
|
let count_after_ingest = db.text_segment_count();
|
|
eprintln!("[tantivy_merge] Segments after 1M ingest: {count_after_ingest}");
|
|
|
|
// 10 steady-state rounds of 5K items each.
|
|
for round in 0..10 {
|
|
for i in 0u64..5_000 {
|
|
let item_id = 1_000_000 + round * 5_000 + i;
|
|
let mut meta = HashMap::new();
|
|
meta.insert(
|
|
"title".to_string(),
|
|
format!("Steady state round {round} item {i}"),
|
|
);
|
|
meta.insert("category".to_string(), "steady".to_string());
|
|
db.write_item_with_metadata(EntityId::new(item_id + 1), &meta)
|
|
.unwrap();
|
|
}
|
|
db.flush_text_index().unwrap();
|
|
let count = db.text_segment_count();
|
|
eprintln!("[tantivy_merge] Round {round}: segment_count = {count}");
|
|
}
|
|
|
|
let final_count = db.text_segment_count();
|
|
eprintln!("[tantivy_merge] Final segment count: {final_count}");
|
|
assert!(
|
|
final_count < 20,
|
|
"segment_count={final_count} exceeds target of 20 at steady state"
|
|
);
|
|
}
|
|
|
|
/// Verify concurrent read latency stays below 100ms p99 while a writer adds items.
|
|
///
|
|
/// **Manual verification test** — not run in CI.
|
|
///
|
|
/// Acceptance criterion: p99 search latency < 100ms while concurrent writes proceed.
|
|
#[test]
|
|
#[ignore = "manual verification: takes ~30 seconds"]
|
|
fn tantivy_concurrent_read_write_latency() {
|
|
use std::sync::Arc;
|
|
use std::sync::atomic::{AtomicBool, Ordering};
|
|
|
|
let mut builder = SchemaBuilder::new();
|
|
builder.text_field("title", TextFieldType::Text);
|
|
let schema = builder.build().unwrap();
|
|
|
|
let db = Arc::new(
|
|
TidalDb::builder()
|
|
.ephemeral()
|
|
.with_schema(schema)
|
|
.open()
|
|
.unwrap(),
|
|
);
|
|
|
|
// Seed with 10K items.
|
|
for i in 0u64..10_000 {
|
|
let mut meta = HashMap::new();
|
|
meta.insert(
|
|
"title".to_string(),
|
|
format!("Seed item {i} rust programming"),
|
|
);
|
|
db.write_item_with_metadata(EntityId::new(i + 1), &meta)
|
|
.unwrap();
|
|
}
|
|
std::thread::sleep(Duration::from_millis(3000));
|
|
db.reload_text_index().unwrap();
|
|
|
|
// Writer thread: add 5K items over 10 seconds.
|
|
let writer_db = Arc::clone(&db);
|
|
let stop = Arc::new(AtomicBool::new(false));
|
|
let stop_writer = Arc::clone(&stop);
|
|
|
|
let writer = std::thread::spawn(move || {
|
|
for i in 0u64..5_000 {
|
|
if stop_writer.load(Ordering::Relaxed) {
|
|
break;
|
|
}
|
|
let mut meta = HashMap::new();
|
|
meta.insert(
|
|
"title".to_string(),
|
|
format!("Concurrent write item {i} async concurrency"),
|
|
);
|
|
writer_db
|
|
.write_item_with_metadata(EntityId::new(10_001 + i), &meta)
|
|
.unwrap();
|
|
std::thread::sleep(Duration::from_millis(2));
|
|
}
|
|
});
|
|
|
|
// Reader thread: search continuously, collect latencies.
|
|
let reader_db = Arc::clone(&db);
|
|
let mut latencies_ms = Vec::with_capacity(500);
|
|
|
|
for _ in 0..500 {
|
|
let t = Instant::now();
|
|
let _ = reader_db
|
|
.search(
|
|
&tidaldb::query::search::Search::builder()
|
|
.query("rust programming async")
|
|
.limit(10)
|
|
.build()
|
|
.unwrap(),
|
|
)
|
|
.unwrap();
|
|
latencies_ms.push(t.elapsed().as_secs_f64() * 1000.0);
|
|
std::thread::sleep(Duration::from_millis(20));
|
|
}
|
|
|
|
stop.store(true, Ordering::Relaxed);
|
|
writer.join().unwrap();
|
|
|
|
latencies_ms.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
|
let p99_idx = (latencies_ms.len() as f64 * 0.99) as usize;
|
|
let p99 = latencies_ms[p99_idx.min(latencies_ms.len() - 1)];
|
|
let p50 = latencies_ms[latencies_ms.len() / 2];
|
|
eprintln!(
|
|
"[tantivy_merge] Concurrent read latency: p50={p50:.2}ms p99={p99:.2}ms (n={})",
|
|
latencies_ms.len()
|
|
);
|
|
|
|
assert!(
|
|
p99 < 100.0,
|
|
"concurrent read p99={p99:.2}ms exceeds 100ms target"
|
|
);
|
|
}
|