tidaldb/tidal/examples/quickstart.rs
2026-02-23 22:41:16 -07:00

202 lines
6.5 KiB
Rust

#![allow(clippy::unwrap_used)]
//! tidalDB quickstart: schema, items, signals, ranking.
//!
//! Demonstrates the full ingestion-to-ranking loop:
//! 1. Define a schema with `view` and `like` signals
//! 2. Open an ephemeral database
//! 3. Write 20 items with metadata and 128D random embeddings
//! 4. Record engagement: view 5 items, like 3 of those
//! 5. Retrieve ranked results using the `trending` profile
//! 6. Print ranked items with scores
//!
//! # Running
//!
//! ```bash
//! cargo run --manifest-path tidal/Cargo.toml --example quickstart
//! ```
use std::collections::HashMap;
use std::time::Duration;
use rand::Rng;
use tidaldb::TidalDb;
use tidaldb::schema::{DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window};
/// Generate a random unit-normalized embedding of the given dimensionality.
fn random_unit_vector(dim: usize, rng: &mut impl Rng) -> Vec<f32> {
let v: Vec<f32> = (0..dim).map(|_| rng.random::<f32>() - 0.5).collect();
let norm = v.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm < f32::EPSILON {
// Degenerate case: return a unit vector along the first axis.
let mut unit = vec![0.0_f32; dim];
unit[0] = 1.0;
return unit;
}
v.iter().map(|x| x / norm).collect()
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
// Initialize tracing so spans emitted by tidalDB are visible.
tracing_subscriber::fmt()
.with_env_filter("tidaldb=info")
.init();
// ── 1. Define the schema ────────────────────────────────────────────
let mut schema = SchemaBuilder::new();
// View signal: 7-day half-life, 1h + 24h windows, velocity enabled.
let _ = schema
.signal(
"view",
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(7 * 24 * 3600),
},
)
.windows(&[Window::OneHour, Window::TwentyFourHours, Window::AllTime])
.velocity(true)
.add();
// Like signal: 30-day half-life, AllTime window.
let _ = schema
.signal(
"like",
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(30 * 24 * 3600),
},
)
.windows(&[Window::AllTime])
.velocity(false)
.add();
// Share signal: needed by the trending profile's boost definitions.
let _ = schema
.signal(
"share",
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(3 * 24 * 3600),
},
)
.windows(&[Window::TwentyFourHours, Window::AllTime])
.velocity(true)
.add();
let schema = schema.build()?;
// ── 2. Open an ephemeral database ───────────────────────────────────
let db = TidalDb::builder().ephemeral().with_schema(schema).open()?;
db.health_check()?;
println!("tidalDB opened (ephemeral, build: {})", tidaldb::BUILD_HASH);
println!();
// ── 3. Write 20 items with metadata and embeddings ──────────────────
let mut rng = rand::rng();
let categories = ["music", "tech", "cooking", "sports", "art"];
let dim = 128;
for i in 1..=20 {
let mut metadata = HashMap::new();
metadata.insert("title".to_string(), format!("Item {i}: Great Content"));
metadata.insert(
"category".to_string(),
categories[i % categories.len()].to_string(),
);
metadata.insert("format".to_string(), "video".to_string());
metadata.insert("duration".to_string(), format!("{}", 60 + i * 30));
metadata.insert(
"created_at".to_string(),
Timestamp::now().as_nanos().to_string(),
);
db.write_item_with_metadata(EntityId::new(i as u64), &metadata)?;
let embedding = random_unit_vector(dim, &mut rng);
db.write_item_embedding(EntityId::new(i as u64), &embedding)?;
}
println!(
"Wrote {} items with metadata and {dim}D embeddings.",
db.item_count()
);
// ── 4. Record engagement signals ────────────────────────────────────
let now = Timestamp::now();
let viewed_items = [1u64, 3, 7, 12, 18];
let liked_items = [3u64, 7, 18];
for &item_id in &viewed_items {
db.signal("view", EntityId::new(item_id), 1.0, now)?;
}
for &item_id in &liked_items {
db.signal("like", EntityId::new(item_id), 1.0, now)?;
}
println!(
"Recorded {} views and {} likes.",
viewed_items.len(),
liked_items.len()
);
// Verify signal state is live.
let score = db.read_decay_score(EntityId::new(3), "view", 0)?;
println!("Item 3 view decay score: {:.4}", score.unwrap_or(0.0));
println!();
// ── 5. Retrieve ranked results ──────────────────────────────────────
// The `trending` builtin profile ranks by share + view velocity with
// diversity enforcement (max 1 item per creator).
let query = tidaldb::query::retrieve::Retrieve::builder()
.profile("trending")
.limit(10)
.build()?;
let results = db.retrieve(&query)?;
println!(
"RETRIEVE profile=trending: {} results from {} candidates",
results.items.len(),
results.total_candidates
);
println!("{:<6} {:<12} {:<8} Signals", "Rank", "Entity ID", "Score");
println!("{}", "-".repeat(50));
for item in &results.items {
let signal_summary: String = item
.signals
.iter()
.map(|s| format!("{}={:.3}", s.name, s.value))
.collect::<Vec<_>>()
.join(", ");
println!(
"{:<6} {:<12} {:<8.4} {}",
item.rank,
item.entity_id.as_u64(),
item.score,
if signal_summary.is_empty() {
"-".to_string()
} else {
signal_summary
}
);
}
println!();
// ── 6. Clean up ─────────────────────────────────────────────────────
db.close()?;
println!("tidalDB closed. Quickstart complete.");
Ok(())
}