tidaldb/tidal/src/schema/signal.rs
jordan 413b712c0a chore: initialize tidalDB repository with schema foundation and standards
- Schema phase 1 (tasks 01-02): EntityId, EntityKind, Timestamp, Score, SignalTypeDef, DecayModel, Window, WindowSet — all with property tests and benchmarks scaffolding
- Stub modules for storage, signals, query, ranking
- Full documentation suite: VISION, USE_CASES, SEQUENCE, API, CODING_GUIDELINES, ai-lookup, research docs, specs, roadmap, planning docs
- Marketing site (Next.js) with blog infrastructure
- .claude/ agents and skills for the tidalDB development workflow
- Foundation standards enforced: thiserror + tracing declared as dependencies, clippy::unwrap_used = deny added to lint config
- .gitignore hardened: .next/, node_modules/, .env, secrets, logs

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-20 12:52:20 -07:00

452 lines
13 KiB
Rust

use std::fmt;
use std::time::Duration;
use super::EntityKind;
/// A named signal type definition declared in schema.
///
/// This is the *declaration*, not runtime state. It describes how a signal
/// decays, what windows to maintain aggregates for, and whether velocity
/// is computed. The actual signal ledger and aggregation logic are Phase 1.4.
///
/// Fields are private — once validated and constructed by the `SchemaBuilder`
/// (Task 03), signal type definitions are immutable.
#[derive(Debug, Clone)]
pub struct SignalTypeDef {
name: String,
target: EntityKind,
decay: DecayModel,
windows: WindowSet,
velocity_enabled: bool,
}
impl SignalTypeDef {
/// Construct a signal type definition.
///
/// `pub(crate)`: only callable from the validation module (`SchemaBuilder`).
#[allow(dead_code)]
pub(crate) const fn new(
name: String,
target: EntityKind,
decay: DecayModel,
windows: WindowSet,
velocity_enabled: bool,
) -> Self {
Self {
name,
target,
decay,
windows,
velocity_enabled,
}
}
/// Unique name within the schema (e.g., "view", "like", "skip").
#[must_use]
pub fn name(&self) -> &str {
&self.name
}
/// Which entity kind this signal targets.
#[must_use]
pub const fn target(&self) -> EntityKind {
self.target
}
/// How the signal's weight decays over time.
#[must_use]
pub const fn decay(&self) -> &DecayModel {
&self.decay
}
/// Which time windows to maintain aggregates for.
#[must_use]
pub const fn windows(&self) -> &WindowSet {
&self.windows
}
/// Whether velocity computation is enabled.
#[must_use]
pub const fn velocity_enabled(&self) -> bool {
self.velocity_enabled
}
}
/// How a signal's contribution decays over time.
///
/// The critical design choice: `Exponential` stores the pre-computed
/// `lambda = ln(2) / half_life.as_secs_f64()` so that every signal write
/// and every ranking read avoids a division on the hot path.
#[derive(Debug, Clone, PartialEq)]
pub enum DecayModel {
/// Weight halves every `half_life`.
///
/// Running score formula: `S(t) = S(t_prev) * exp(-lambda * dt) + weight`
Exponential {
/// The duration after which the signal's contribution halves.
half_life: Duration,
/// Pre-computed: `ln(2) / half_life.as_secs_f64()`.
lambda: f64,
},
/// Weight drops linearly to zero over `lifetime`.
Linear {
/// The duration over which the signal fully decays.
lifetime: Duration,
},
/// Never decays. Used for permanent flags: hide, block, follow.
Permanent,
}
impl DecayModel {
/// Construct exponential decay with pre-computed lambda.
///
/// `pub(crate)`: bypasses validation. Use `SchemaBuilder` for external construction.
#[allow(dead_code)]
pub(crate) fn exponential(half_life: Duration) -> Self {
let lambda = std::f64::consts::LN_2 / half_life.as_secs_f64();
Self::Exponential { half_life, lambda }
}
/// Construct linear decay.
///
/// `pub(crate)`: bypasses validation. Use `SchemaBuilder` for external construction.
#[allow(dead_code)]
pub(crate) const fn linear(lifetime: Duration) -> Self {
Self::Linear { lifetime }
}
/// Returns the lambda value for `Exponential`, `None` otherwise.
#[must_use]
pub const fn lambda(&self) -> Option<f64> {
match self {
Self::Exponential { lambda, .. } => Some(*lambda),
_ => None,
}
}
/// Returns the half-life for `Exponential`, `None` otherwise.
#[must_use]
pub const fn half_life(&self) -> Option<Duration> {
match self {
Self::Exponential { half_life, .. } => Some(*half_life),
_ => None,
}
}
}
/// A time window for signal aggregation.
///
/// Fixed variants — not configurable durations. The storage engine
/// pre-allocates bucketed counters per window. The materializer schedules
/// rollups at window boundaries. Arbitrary durations would force dynamic
/// allocation and unpredictable rollup schedules.
///
/// The `Ord` derivation sorts by temporal duration:
/// `OneHour < TwentyFourHours < SevenDays < ThirtyDays < AllTime`.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum Window {
OneHour,
TwentyFourHours,
SevenDays,
ThirtyDays,
AllTime,
}
impl Window {
/// The duration this window spans. `AllTime` returns `Duration::MAX`.
#[must_use]
pub const fn duration(&self) -> Duration {
match self {
Self::OneHour => Duration::from_secs(3_600),
Self::TwentyFourHours => Duration::from_secs(86_400),
Self::SevenDays => Duration::from_secs(604_800),
Self::ThirtyDays => Duration::from_secs(2_592_000),
Self::AllTime => Duration::MAX,
}
}
/// Duration in seconds as `f64`.
///
/// For velocity computation: `count / duration_secs`.
/// `AllTime` returns `f64::INFINITY` — velocity = count / infinity = 0.0,
/// which is correct (all-time counts don't have a meaningful rate).
#[must_use]
pub const fn duration_secs_f64(&self) -> f64 {
match self {
Self::OneHour => 3_600.0,
Self::TwentyFourHours => 86_400.0,
Self::SevenDays => 604_800.0,
Self::ThirtyDays => 2_592_000.0,
Self::AllTime => f64::INFINITY,
}
}
/// Short label for display and key encoding.
#[must_use]
pub const fn label(&self) -> &'static str {
match self {
Self::OneHour => "1h",
Self::TwentyFourHours => "24h",
Self::SevenDays => "7d",
Self::ThirtyDays => "30d",
Self::AllTime => "all",
}
}
}
impl fmt::Display for Window {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.label())
}
}
/// An ordered, deduplicated set of windows.
///
/// Sorted from finest to coarsest (`OneHour < ... < AllTime`).
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WindowSet {
windows: Vec<Window>,
}
impl WindowSet {
/// Construct from a slice. Deduplicates and sorts.
#[must_use]
pub fn new(windows: &[Window]) -> Self {
let mut sorted: Vec<Window> = windows.to_vec();
sorted.sort();
sorted.dedup();
Self { windows: sorted }
}
/// Empty set. Valid only for `Permanent` decay signals.
#[must_use]
pub const fn empty() -> Self {
Self {
windows: Vec::new(),
}
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.windows.is_empty()
}
#[must_use]
pub fn len(&self) -> usize {
self.windows.len()
}
pub fn iter(&self) -> std::slice::Iter<'_, Window> {
self.windows.iter()
}
#[must_use]
pub fn contains(&self, w: &Window) -> bool {
self.windows.contains(w)
}
}
impl<'a> IntoIterator for &'a WindowSet {
type Item = &'a Window;
type IntoIter = std::slice::Iter<'a, Window>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
#[cfg(test)]
mod tests {
use super::*;
// === Window tests ===
#[test]
fn window_ordering() {
assert!(Window::OneHour < Window::TwentyFourHours);
assert!(Window::TwentyFourHours < Window::SevenDays);
assert!(Window::SevenDays < Window::ThirtyDays);
assert!(Window::ThirtyDays < Window::AllTime);
}
#[test]
fn window_durations() {
assert_eq!(Window::OneHour.duration(), Duration::from_secs(3_600));
assert_eq!(
Window::TwentyFourHours.duration(),
Duration::from_secs(86_400)
);
assert_eq!(Window::SevenDays.duration(), Duration::from_secs(604_800));
assert_eq!(
Window::ThirtyDays.duration(),
Duration::from_secs(2_592_000)
);
assert_eq!(Window::AllTime.duration(), Duration::MAX);
}
#[test]
fn window_labels() {
assert_eq!(Window::OneHour.label(), "1h");
assert_eq!(Window::TwentyFourHours.label(), "24h");
assert_eq!(Window::SevenDays.label(), "7d");
assert_eq!(Window::ThirtyDays.label(), "30d");
assert_eq!(Window::AllTime.label(), "all");
}
#[test]
fn window_display_delegates_to_label() {
assert_eq!(Window::OneHour.to_string(), "1h");
assert_eq!(Window::AllTime.to_string(), "all");
}
#[test]
fn window_duration_secs_f64() {
assert_eq!(Window::OneHour.duration_secs_f64(), 3_600.0);
assert_eq!(Window::TwentyFourHours.duration_secs_f64(), 86_400.0);
assert!(Window::AllTime.duration_secs_f64().is_infinite());
}
// === WindowSet tests ===
#[test]
fn window_set_dedup_and_sort() {
let ws = WindowSet::new(&[
Window::SevenDays,
Window::OneHour,
Window::SevenDays,
Window::AllTime,
]);
assert_eq!(ws.len(), 3);
let windows: Vec<_> = ws.iter().copied().collect();
assert_eq!(
windows,
vec![Window::OneHour, Window::SevenDays, Window::AllTime]
);
}
#[test]
fn window_set_empty() {
let ws = WindowSet::empty();
assert!(ws.is_empty());
assert_eq!(ws.len(), 0);
}
#[test]
fn window_set_contains() {
let ws = WindowSet::new(&[Window::OneHour, Window::AllTime]);
assert!(ws.contains(&Window::OneHour));
assert!(ws.contains(&Window::AllTime));
assert!(!ws.contains(&Window::SevenDays));
}
// === DecayModel tests ===
#[test]
fn decay_model_exponential() {
let model = DecayModel::exponential(Duration::from_secs(604_800)); // 7 days
assert!(matches!(model, DecayModel::Exponential { .. }));
let lambda = model.lambda().unwrap();
let expected = std::f64::consts::LN_2 / 604_800.0;
assert!((lambda - expected).abs() < 1e-20);
}
#[test]
fn decay_model_linear() {
let model = DecayModel::linear(Duration::from_secs(86_400));
assert!(matches!(model, DecayModel::Linear { .. }));
assert!(model.lambda().is_none());
assert!(model.half_life().is_none());
}
#[test]
fn decay_model_permanent() {
assert_eq!(DecayModel::Permanent.lambda(), None);
assert_eq!(DecayModel::Permanent.half_life(), None);
}
#[test]
fn decay_model_tiny_halflife() {
let model = DecayModel::exponential(Duration::from_nanos(1));
let lambda = model.lambda().unwrap();
// lambda should be enormous — signals decay instantly
assert!(lambda > 1e8);
}
#[test]
fn decay_model_huge_halflife() {
let model = DecayModel::exponential(Duration::from_secs(365 * 24 * 3600)); // 1 year
let lambda = model.lambda().unwrap();
assert!(lambda > 0.0);
assert!(lambda < 1e-6);
}
#[test]
fn decay_model_exponential_stores_half_life() {
let hl = Duration::from_secs(3600);
let model = DecayModel::exponential(hl);
assert_eq!(model.half_life(), Some(hl));
}
// === SignalTypeDef tests ===
#[test]
fn signal_type_def_getters() {
let def = SignalTypeDef::new(
"view".into(),
EntityKind::Item,
DecayModel::exponential(Duration::from_secs(604_800)),
WindowSet::new(&[Window::OneHour, Window::AllTime]),
true,
);
assert_eq!(def.name(), "view");
assert_eq!(def.target(), EntityKind::Item);
assert!(def.velocity_enabled());
assert_eq!(def.windows().len(), 2);
assert!(def.decay().lambda().is_some());
}
#[test]
fn signal_type_def_permanent_no_windows() {
let def = SignalTypeDef::new(
"hide".into(),
EntityKind::Item,
DecayModel::Permanent,
WindowSet::empty(),
false,
);
assert_eq!(def.name(), "hide");
assert!(!def.velocity_enabled());
assert!(def.windows().is_empty());
assert!(def.decay().lambda().is_none());
}
// === Property tests ===
mod proptests {
use super::*;
use proptest::prelude::*;
proptest! {
#[test]
fn decay_lambda_correct(secs in 1u64..=31_536_000u64) {
let half_life = Duration::from_secs(secs);
let model = DecayModel::exponential(half_life);
if let DecayModel::Exponential { lambda, .. } = model {
let expected = std::f64::consts::LN_2 / half_life.as_secs_f64();
prop_assert!((lambda - expected).abs() < 1e-15);
}
}
#[test]
fn lambda_times_halflife_is_ln2(secs in 1u64..=31_536_000u64) {
let half_life = Duration::from_secs(secs);
let model = DecayModel::exponential(half_life);
if let DecayModel::Exponential { lambda, .. } = model {
let product = lambda * half_life.as_secs_f64();
prop_assert!((product - std::f64::consts::LN_2).abs() < 1e-10);
}
}
}
}
}