- Schema phase 1 (tasks 01-02): EntityId, EntityKind, Timestamp, Score, SignalTypeDef, DecayModel, Window, WindowSet — all with property tests and benchmarks scaffolding - Stub modules for storage, signals, query, ranking - Full documentation suite: VISION, USE_CASES, SEQUENCE, API, CODING_GUIDELINES, ai-lookup, research docs, specs, roadmap, planning docs - Marketing site (Next.js) with blog infrastructure - .claude/ agents and skills for the tidalDB development workflow - Foundation standards enforced: thiserror + tracing declared as dependencies, clippy::unwrap_used = deny added to lint config - .gitignore hardened: .next/, node_modules/, .env, secrets, logs Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
452 lines
13 KiB
Rust
452 lines
13 KiB
Rust
use std::fmt;
|
|
use std::time::Duration;
|
|
|
|
use super::EntityKind;
|
|
|
|
/// A named signal type definition declared in schema.
|
|
///
|
|
/// This is the *declaration*, not runtime state. It describes how a signal
|
|
/// decays, what windows to maintain aggregates for, and whether velocity
|
|
/// is computed. The actual signal ledger and aggregation logic are Phase 1.4.
|
|
///
|
|
/// Fields are private — once validated and constructed by the `SchemaBuilder`
|
|
/// (Task 03), signal type definitions are immutable.
|
|
#[derive(Debug, Clone)]
|
|
pub struct SignalTypeDef {
|
|
name: String,
|
|
target: EntityKind,
|
|
decay: DecayModel,
|
|
windows: WindowSet,
|
|
velocity_enabled: bool,
|
|
}
|
|
|
|
impl SignalTypeDef {
|
|
/// Construct a signal type definition.
|
|
///
|
|
/// `pub(crate)`: only callable from the validation module (`SchemaBuilder`).
|
|
#[allow(dead_code)]
|
|
pub(crate) const fn new(
|
|
name: String,
|
|
target: EntityKind,
|
|
decay: DecayModel,
|
|
windows: WindowSet,
|
|
velocity_enabled: bool,
|
|
) -> Self {
|
|
Self {
|
|
name,
|
|
target,
|
|
decay,
|
|
windows,
|
|
velocity_enabled,
|
|
}
|
|
}
|
|
|
|
/// Unique name within the schema (e.g., "view", "like", "skip").
|
|
#[must_use]
|
|
pub fn name(&self) -> &str {
|
|
&self.name
|
|
}
|
|
|
|
/// Which entity kind this signal targets.
|
|
#[must_use]
|
|
pub const fn target(&self) -> EntityKind {
|
|
self.target
|
|
}
|
|
|
|
/// How the signal's weight decays over time.
|
|
#[must_use]
|
|
pub const fn decay(&self) -> &DecayModel {
|
|
&self.decay
|
|
}
|
|
|
|
/// Which time windows to maintain aggregates for.
|
|
#[must_use]
|
|
pub const fn windows(&self) -> &WindowSet {
|
|
&self.windows
|
|
}
|
|
|
|
/// Whether velocity computation is enabled.
|
|
#[must_use]
|
|
pub const fn velocity_enabled(&self) -> bool {
|
|
self.velocity_enabled
|
|
}
|
|
}
|
|
|
|
/// How a signal's contribution decays over time.
|
|
///
|
|
/// The critical design choice: `Exponential` stores the pre-computed
|
|
/// `lambda = ln(2) / half_life.as_secs_f64()` so that every signal write
|
|
/// and every ranking read avoids a division on the hot path.
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
pub enum DecayModel {
|
|
/// Weight halves every `half_life`.
|
|
///
|
|
/// Running score formula: `S(t) = S(t_prev) * exp(-lambda * dt) + weight`
|
|
Exponential {
|
|
/// The duration after which the signal's contribution halves.
|
|
half_life: Duration,
|
|
/// Pre-computed: `ln(2) / half_life.as_secs_f64()`.
|
|
lambda: f64,
|
|
},
|
|
/// Weight drops linearly to zero over `lifetime`.
|
|
Linear {
|
|
/// The duration over which the signal fully decays.
|
|
lifetime: Duration,
|
|
},
|
|
/// Never decays. Used for permanent flags: hide, block, follow.
|
|
Permanent,
|
|
}
|
|
|
|
impl DecayModel {
|
|
/// Construct exponential decay with pre-computed lambda.
|
|
///
|
|
/// `pub(crate)`: bypasses validation. Use `SchemaBuilder` for external construction.
|
|
#[allow(dead_code)]
|
|
pub(crate) fn exponential(half_life: Duration) -> Self {
|
|
let lambda = std::f64::consts::LN_2 / half_life.as_secs_f64();
|
|
Self::Exponential { half_life, lambda }
|
|
}
|
|
|
|
/// Construct linear decay.
|
|
///
|
|
/// `pub(crate)`: bypasses validation. Use `SchemaBuilder` for external construction.
|
|
#[allow(dead_code)]
|
|
pub(crate) const fn linear(lifetime: Duration) -> Self {
|
|
Self::Linear { lifetime }
|
|
}
|
|
|
|
/// Returns the lambda value for `Exponential`, `None` otherwise.
|
|
#[must_use]
|
|
pub const fn lambda(&self) -> Option<f64> {
|
|
match self {
|
|
Self::Exponential { lambda, .. } => Some(*lambda),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// Returns the half-life for `Exponential`, `None` otherwise.
|
|
#[must_use]
|
|
pub const fn half_life(&self) -> Option<Duration> {
|
|
match self {
|
|
Self::Exponential { half_life, .. } => Some(*half_life),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A time window for signal aggregation.
|
|
///
|
|
/// Fixed variants — not configurable durations. The storage engine
|
|
/// pre-allocates bucketed counters per window. The materializer schedules
|
|
/// rollups at window boundaries. Arbitrary durations would force dynamic
|
|
/// allocation and unpredictable rollup schedules.
|
|
///
|
|
/// The `Ord` derivation sorts by temporal duration:
|
|
/// `OneHour < TwentyFourHours < SevenDays < ThirtyDays < AllTime`.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
|
pub enum Window {
|
|
OneHour,
|
|
TwentyFourHours,
|
|
SevenDays,
|
|
ThirtyDays,
|
|
AllTime,
|
|
}
|
|
|
|
impl Window {
|
|
/// The duration this window spans. `AllTime` returns `Duration::MAX`.
|
|
#[must_use]
|
|
pub const fn duration(&self) -> Duration {
|
|
match self {
|
|
Self::OneHour => Duration::from_secs(3_600),
|
|
Self::TwentyFourHours => Duration::from_secs(86_400),
|
|
Self::SevenDays => Duration::from_secs(604_800),
|
|
Self::ThirtyDays => Duration::from_secs(2_592_000),
|
|
Self::AllTime => Duration::MAX,
|
|
}
|
|
}
|
|
|
|
/// Duration in seconds as `f64`.
|
|
///
|
|
/// For velocity computation: `count / duration_secs`.
|
|
/// `AllTime` returns `f64::INFINITY` — velocity = count / infinity = 0.0,
|
|
/// which is correct (all-time counts don't have a meaningful rate).
|
|
#[must_use]
|
|
pub const fn duration_secs_f64(&self) -> f64 {
|
|
match self {
|
|
Self::OneHour => 3_600.0,
|
|
Self::TwentyFourHours => 86_400.0,
|
|
Self::SevenDays => 604_800.0,
|
|
Self::ThirtyDays => 2_592_000.0,
|
|
Self::AllTime => f64::INFINITY,
|
|
}
|
|
}
|
|
|
|
/// Short label for display and key encoding.
|
|
#[must_use]
|
|
pub const fn label(&self) -> &'static str {
|
|
match self {
|
|
Self::OneHour => "1h",
|
|
Self::TwentyFourHours => "24h",
|
|
Self::SevenDays => "7d",
|
|
Self::ThirtyDays => "30d",
|
|
Self::AllTime => "all",
|
|
}
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for Window {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
f.write_str(self.label())
|
|
}
|
|
}
|
|
|
|
/// An ordered, deduplicated set of windows.
|
|
///
|
|
/// Sorted from finest to coarsest (`OneHour < ... < AllTime`).
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub struct WindowSet {
|
|
windows: Vec<Window>,
|
|
}
|
|
|
|
impl WindowSet {
|
|
/// Construct from a slice. Deduplicates and sorts.
|
|
#[must_use]
|
|
pub fn new(windows: &[Window]) -> Self {
|
|
let mut sorted: Vec<Window> = windows.to_vec();
|
|
sorted.sort();
|
|
sorted.dedup();
|
|
Self { windows: sorted }
|
|
}
|
|
|
|
/// Empty set. Valid only for `Permanent` decay signals.
|
|
#[must_use]
|
|
pub const fn empty() -> Self {
|
|
Self {
|
|
windows: Vec::new(),
|
|
}
|
|
}
|
|
|
|
#[must_use]
|
|
pub fn is_empty(&self) -> bool {
|
|
self.windows.is_empty()
|
|
}
|
|
|
|
#[must_use]
|
|
pub fn len(&self) -> usize {
|
|
self.windows.len()
|
|
}
|
|
|
|
pub fn iter(&self) -> std::slice::Iter<'_, Window> {
|
|
self.windows.iter()
|
|
}
|
|
|
|
#[must_use]
|
|
pub fn contains(&self, w: &Window) -> bool {
|
|
self.windows.contains(w)
|
|
}
|
|
}
|
|
|
|
impl<'a> IntoIterator for &'a WindowSet {
|
|
type Item = &'a Window;
|
|
type IntoIter = std::slice::Iter<'a, Window>;
|
|
|
|
fn into_iter(self) -> Self::IntoIter {
|
|
self.iter()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
// === Window tests ===
|
|
|
|
#[test]
|
|
fn window_ordering() {
|
|
assert!(Window::OneHour < Window::TwentyFourHours);
|
|
assert!(Window::TwentyFourHours < Window::SevenDays);
|
|
assert!(Window::SevenDays < Window::ThirtyDays);
|
|
assert!(Window::ThirtyDays < Window::AllTime);
|
|
}
|
|
|
|
#[test]
|
|
fn window_durations() {
|
|
assert_eq!(Window::OneHour.duration(), Duration::from_secs(3_600));
|
|
assert_eq!(
|
|
Window::TwentyFourHours.duration(),
|
|
Duration::from_secs(86_400)
|
|
);
|
|
assert_eq!(Window::SevenDays.duration(), Duration::from_secs(604_800));
|
|
assert_eq!(
|
|
Window::ThirtyDays.duration(),
|
|
Duration::from_secs(2_592_000)
|
|
);
|
|
assert_eq!(Window::AllTime.duration(), Duration::MAX);
|
|
}
|
|
|
|
#[test]
|
|
fn window_labels() {
|
|
assert_eq!(Window::OneHour.label(), "1h");
|
|
assert_eq!(Window::TwentyFourHours.label(), "24h");
|
|
assert_eq!(Window::SevenDays.label(), "7d");
|
|
assert_eq!(Window::ThirtyDays.label(), "30d");
|
|
assert_eq!(Window::AllTime.label(), "all");
|
|
}
|
|
|
|
#[test]
|
|
fn window_display_delegates_to_label() {
|
|
assert_eq!(Window::OneHour.to_string(), "1h");
|
|
assert_eq!(Window::AllTime.to_string(), "all");
|
|
}
|
|
|
|
#[test]
|
|
fn window_duration_secs_f64() {
|
|
assert_eq!(Window::OneHour.duration_secs_f64(), 3_600.0);
|
|
assert_eq!(Window::TwentyFourHours.duration_secs_f64(), 86_400.0);
|
|
assert!(Window::AllTime.duration_secs_f64().is_infinite());
|
|
}
|
|
|
|
// === WindowSet tests ===
|
|
|
|
#[test]
|
|
fn window_set_dedup_and_sort() {
|
|
let ws = WindowSet::new(&[
|
|
Window::SevenDays,
|
|
Window::OneHour,
|
|
Window::SevenDays,
|
|
Window::AllTime,
|
|
]);
|
|
assert_eq!(ws.len(), 3);
|
|
let windows: Vec<_> = ws.iter().copied().collect();
|
|
assert_eq!(
|
|
windows,
|
|
vec![Window::OneHour, Window::SevenDays, Window::AllTime]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn window_set_empty() {
|
|
let ws = WindowSet::empty();
|
|
assert!(ws.is_empty());
|
|
assert_eq!(ws.len(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn window_set_contains() {
|
|
let ws = WindowSet::new(&[Window::OneHour, Window::AllTime]);
|
|
assert!(ws.contains(&Window::OneHour));
|
|
assert!(ws.contains(&Window::AllTime));
|
|
assert!(!ws.contains(&Window::SevenDays));
|
|
}
|
|
|
|
// === DecayModel tests ===
|
|
|
|
#[test]
|
|
fn decay_model_exponential() {
|
|
let model = DecayModel::exponential(Duration::from_secs(604_800)); // 7 days
|
|
assert!(matches!(model, DecayModel::Exponential { .. }));
|
|
let lambda = model.lambda().unwrap();
|
|
let expected = std::f64::consts::LN_2 / 604_800.0;
|
|
assert!((lambda - expected).abs() < 1e-20);
|
|
}
|
|
|
|
#[test]
|
|
fn decay_model_linear() {
|
|
let model = DecayModel::linear(Duration::from_secs(86_400));
|
|
assert!(matches!(model, DecayModel::Linear { .. }));
|
|
assert!(model.lambda().is_none());
|
|
assert!(model.half_life().is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn decay_model_permanent() {
|
|
assert_eq!(DecayModel::Permanent.lambda(), None);
|
|
assert_eq!(DecayModel::Permanent.half_life(), None);
|
|
}
|
|
|
|
#[test]
|
|
fn decay_model_tiny_halflife() {
|
|
let model = DecayModel::exponential(Duration::from_nanos(1));
|
|
let lambda = model.lambda().unwrap();
|
|
// lambda should be enormous — signals decay instantly
|
|
assert!(lambda > 1e8);
|
|
}
|
|
|
|
#[test]
|
|
fn decay_model_huge_halflife() {
|
|
let model = DecayModel::exponential(Duration::from_secs(365 * 24 * 3600)); // 1 year
|
|
let lambda = model.lambda().unwrap();
|
|
assert!(lambda > 0.0);
|
|
assert!(lambda < 1e-6);
|
|
}
|
|
|
|
#[test]
|
|
fn decay_model_exponential_stores_half_life() {
|
|
let hl = Duration::from_secs(3600);
|
|
let model = DecayModel::exponential(hl);
|
|
assert_eq!(model.half_life(), Some(hl));
|
|
}
|
|
|
|
// === SignalTypeDef tests ===
|
|
|
|
#[test]
|
|
fn signal_type_def_getters() {
|
|
let def = SignalTypeDef::new(
|
|
"view".into(),
|
|
EntityKind::Item,
|
|
DecayModel::exponential(Duration::from_secs(604_800)),
|
|
WindowSet::new(&[Window::OneHour, Window::AllTime]),
|
|
true,
|
|
);
|
|
assert_eq!(def.name(), "view");
|
|
assert_eq!(def.target(), EntityKind::Item);
|
|
assert!(def.velocity_enabled());
|
|
assert_eq!(def.windows().len(), 2);
|
|
assert!(def.decay().lambda().is_some());
|
|
}
|
|
|
|
#[test]
|
|
fn signal_type_def_permanent_no_windows() {
|
|
let def = SignalTypeDef::new(
|
|
"hide".into(),
|
|
EntityKind::Item,
|
|
DecayModel::Permanent,
|
|
WindowSet::empty(),
|
|
false,
|
|
);
|
|
assert_eq!(def.name(), "hide");
|
|
assert!(!def.velocity_enabled());
|
|
assert!(def.windows().is_empty());
|
|
assert!(def.decay().lambda().is_none());
|
|
}
|
|
|
|
// === Property tests ===
|
|
|
|
mod proptests {
|
|
use super::*;
|
|
use proptest::prelude::*;
|
|
|
|
proptest! {
|
|
#[test]
|
|
fn decay_lambda_correct(secs in 1u64..=31_536_000u64) {
|
|
let half_life = Duration::from_secs(secs);
|
|
let model = DecayModel::exponential(half_life);
|
|
if let DecayModel::Exponential { lambda, .. } = model {
|
|
let expected = std::f64::consts::LN_2 / half_life.as_secs_f64();
|
|
prop_assert!((lambda - expected).abs() < 1e-15);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn lambda_times_halflife_is_ln2(secs in 1u64..=31_536_000u64) {
|
|
let half_life = Duration::from_secs(secs);
|
|
let model = DecayModel::exponential(half_life);
|
|
if let DecayModel::Exponential { lambda, .. } = model {
|
|
let product = lambda * half_life.as_secs_f64();
|
|
prop_assert!((product - std::f64::consts::LN_2).abs() < 1e-10);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|