202 lines
6.5 KiB
Rust
202 lines
6.5 KiB
Rust
#![allow(clippy::unwrap_used)]
|
|
//! tidalDB quickstart: schema, items, signals, ranking.
|
|
//!
|
|
//! Demonstrates the full ingestion-to-ranking loop:
|
|
//! 1. Define a schema with `view` and `like` signals
|
|
//! 2. Open an ephemeral database
|
|
//! 3. Write 20 items with metadata and 128D random embeddings
|
|
//! 4. Record engagement: view 5 items, like 3 of those
|
|
//! 5. Retrieve ranked results using the `trending` profile
|
|
//! 6. Print ranked items with scores
|
|
//!
|
|
//! # Running
|
|
//!
|
|
//! ```bash
|
|
//! cargo run --manifest-path tidal/Cargo.toml --example quickstart
|
|
//! ```
|
|
|
|
use std::collections::HashMap;
|
|
use std::time::Duration;
|
|
|
|
use rand::Rng;
|
|
use tidaldb::TidalDb;
|
|
use tidaldb::schema::{DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window};
|
|
|
|
/// Generate a random unit-normalized embedding of the given dimensionality.
|
|
fn random_unit_vector(dim: usize, rng: &mut impl Rng) -> Vec<f32> {
|
|
let v: Vec<f32> = (0..dim).map(|_| rng.random::<f32>() - 0.5).collect();
|
|
let norm = v.iter().map(|x| x * x).sum::<f32>().sqrt();
|
|
if norm < f32::EPSILON {
|
|
// Degenerate case: return a unit vector along the first axis.
|
|
let mut unit = vec![0.0_f32; dim];
|
|
unit[0] = 1.0;
|
|
return unit;
|
|
}
|
|
v.iter().map(|x| x / norm).collect()
|
|
}
|
|
|
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
// Initialize tracing so spans emitted by tidalDB are visible.
|
|
tracing_subscriber::fmt()
|
|
.with_env_filter("tidaldb=info")
|
|
.init();
|
|
|
|
// ── 1. Define the schema ────────────────────────────────────────────
|
|
|
|
let mut schema = SchemaBuilder::new();
|
|
|
|
// View signal: 7-day half-life, 1h + 24h windows, velocity enabled.
|
|
let _ = schema
|
|
.signal(
|
|
"view",
|
|
EntityKind::Item,
|
|
DecaySpec::Exponential {
|
|
half_life: Duration::from_secs(7 * 24 * 3600),
|
|
},
|
|
)
|
|
.windows(&[Window::OneHour, Window::TwentyFourHours, Window::AllTime])
|
|
.velocity(true)
|
|
.add();
|
|
|
|
// Like signal: 30-day half-life, AllTime window.
|
|
let _ = schema
|
|
.signal(
|
|
"like",
|
|
EntityKind::Item,
|
|
DecaySpec::Exponential {
|
|
half_life: Duration::from_secs(30 * 24 * 3600),
|
|
},
|
|
)
|
|
.windows(&[Window::AllTime])
|
|
.velocity(false)
|
|
.add();
|
|
|
|
// Share signal: needed by the trending profile's boost definitions.
|
|
let _ = schema
|
|
.signal(
|
|
"share",
|
|
EntityKind::Item,
|
|
DecaySpec::Exponential {
|
|
half_life: Duration::from_secs(3 * 24 * 3600),
|
|
},
|
|
)
|
|
.windows(&[Window::TwentyFourHours, Window::AllTime])
|
|
.velocity(true)
|
|
.add();
|
|
|
|
let schema = schema.build()?;
|
|
|
|
// ── 2. Open an ephemeral database ───────────────────────────────────
|
|
|
|
let db = TidalDb::builder().ephemeral().with_schema(schema).open()?;
|
|
|
|
db.health_check()?;
|
|
|
|
println!("tidalDB opened (ephemeral, build: {})", tidaldb::BUILD_HASH);
|
|
println!();
|
|
|
|
// ── 3. Write 20 items with metadata and embeddings ──────────────────
|
|
|
|
let mut rng = rand::rng();
|
|
let categories = ["music", "tech", "cooking", "sports", "art"];
|
|
let dim = 128;
|
|
|
|
for i in 1..=20 {
|
|
let mut metadata = HashMap::new();
|
|
metadata.insert("title".to_string(), format!("Item {i}: Great Content"));
|
|
metadata.insert(
|
|
"category".to_string(),
|
|
categories[i % categories.len()].to_string(),
|
|
);
|
|
metadata.insert("format".to_string(), "video".to_string());
|
|
metadata.insert("duration".to_string(), format!("{}", 60 + i * 30));
|
|
metadata.insert(
|
|
"created_at".to_string(),
|
|
Timestamp::now().as_nanos().to_string(),
|
|
);
|
|
|
|
db.write_item_with_metadata(EntityId::new(i as u64), &metadata)?;
|
|
|
|
let embedding = random_unit_vector(dim, &mut rng);
|
|
db.write_item_embedding(EntityId::new(i as u64), &embedding)?;
|
|
}
|
|
|
|
println!(
|
|
"Wrote {} items with metadata and {dim}D embeddings.",
|
|
db.item_count()
|
|
);
|
|
|
|
// ── 4. Record engagement signals ────────────────────────────────────
|
|
|
|
let now = Timestamp::now();
|
|
let viewed_items = [1u64, 3, 7, 12, 18];
|
|
let liked_items = [3u64, 7, 18];
|
|
|
|
for &item_id in &viewed_items {
|
|
db.signal("view", EntityId::new(item_id), 1.0, now)?;
|
|
}
|
|
for &item_id in &liked_items {
|
|
db.signal("like", EntityId::new(item_id), 1.0, now)?;
|
|
}
|
|
|
|
println!(
|
|
"Recorded {} views and {} likes.",
|
|
viewed_items.len(),
|
|
liked_items.len()
|
|
);
|
|
|
|
// Verify signal state is live.
|
|
let score = db.read_decay_score(EntityId::new(3), "view", 0)?;
|
|
println!("Item 3 view decay score: {:.4}", score.unwrap_or(0.0));
|
|
println!();
|
|
|
|
// ── 5. Retrieve ranked results ──────────────────────────────────────
|
|
|
|
// The `trending` builtin profile ranks by share + view velocity with
|
|
// diversity enforcement (max 1 item per creator).
|
|
let query = tidaldb::query::retrieve::Retrieve::builder()
|
|
.profile("trending")
|
|
.limit(10)
|
|
.build()?;
|
|
|
|
let results = db.retrieve(&query)?;
|
|
|
|
println!(
|
|
"RETRIEVE profile=trending: {} results from {} candidates",
|
|
results.items.len(),
|
|
results.total_candidates
|
|
);
|
|
println!("{:<6} {:<12} {:<8} Signals", "Rank", "Entity ID", "Score");
|
|
println!("{}", "-".repeat(50));
|
|
|
|
for item in &results.items {
|
|
let signal_summary: String = item
|
|
.signals
|
|
.iter()
|
|
.map(|s| format!("{}={:.3}", s.name, s.value))
|
|
.collect::<Vec<_>>()
|
|
.join(", ");
|
|
|
|
println!(
|
|
"{:<6} {:<12} {:<8.4} {}",
|
|
item.rank,
|
|
item.entity_id.as_u64(),
|
|
item.score,
|
|
if signal_summary.is_empty() {
|
|
"-".to_string()
|
|
} else {
|
|
signal_summary
|
|
}
|
|
);
|
|
}
|
|
|
|
println!();
|
|
|
|
// ── 6. Clean up ─────────────────────────────────────────────────────
|
|
|
|
db.close()?;
|
|
println!("tidalDB closed. Quickstart complete.");
|
|
|
|
Ok(())
|
|
}
|