tidaldb/tidal/src/entities/preference.rs

//! Preference vector: per-user taste embedding with L2 normalization invariant.
//!
//! Tracks user taste by maintaining a preference vector that evolves with
//! interactions. The vector is L2-normalized on every update to ensure
//! consistent cosine similarity scoring during personalized ranking.

use dashmap::DashMap;

/// Per-user preference vector, L2-normalized.
///
/// The vector is updated via exponential moving average: each new interaction
/// embedding is blended with the current preference using a learning rate.
///
/// Thread-safe via `DashMap` -- concurrent updates to different users never
/// contend.
pub struct PreferenceVectors {
    /// `user_id` -> normalized preference vector
    inner: DashMap<u64, Vec<f32>>,
    /// Dimensionality of the embedding space. All vectors must have this length.
    dim: usize,
    /// Learning rate for exponential moving average updates.
    /// Default: 0.1 (the new interaction contributes 10% to the updated preference).
    learning_rate: f32,
}

impl PreferenceVectors {
    /// Create a new preference vector store for the given embedding dimensionality.
    #[must_use]
    pub fn new(dim: usize) -> Self {
        Self {
            inner: DashMap::new(),
            dim,
            learning_rate: 0.1,
        }
    }

    /// Create with a custom learning rate.
    #[must_use]
    pub fn with_learning_rate(dim: usize, learning_rate: f32) -> Self {
        Self {
            inner: DashMap::new(),
            dim,
            learning_rate,
        }
    }

    /// Get the current preference vector for a user (cloned).
    ///
    /// Returns `None` if no preference has been recorded.
    #[must_use]
    pub fn get(&self, user_id: u64) -> Option<Vec<f32>> {
        self.inner.get(&user_id).map(|r| r.clone())
    }

    /// Set the preference vector directly (e.g., from cold-start initialization).
    ///
    /// The vector is L2-normalized before storage. Returns `false` if the
    /// dimension does not match.
    #[must_use]
    pub fn set(&self, user_id: u64, mut vec: Vec<f32>) -> bool {
        if vec.len() != self.dim {
            return false;
        }
        l2_normalize(&mut vec);
        self.inner.insert(user_id, vec);
        true
    }

    /// Update a user's preference vector by blending with an interaction embedding.
    ///
    /// Uses exponential moving average:
    /// `pref = (1 - lr) * pref + lr * interaction`
    /// then L2-normalizes the result.
    ///
    /// If no preference exists yet, the interaction embedding becomes the initial
    /// preference (after normalization). Uses `Entry::Occupied`/`Entry::Vacant`
    /// to avoid double-applying the blend on first insertion.
    ///
    /// Returns `false` if the interaction embedding dimension does not match.
    #[must_use]
    pub fn update(&self, user_id: u64, interaction_embedding: &[f32]) -> bool {
        use dashmap::mapref::entry::Entry;

        if interaction_embedding.len() != self.dim {
            return false;
        }

        let lr = self.learning_rate;
        match self.inner.entry(user_id) {
            Entry::Occupied(mut occ) => {
                let pref = occ.get_mut();
                for (p, &i) in pref.iter_mut().zip(interaction_embedding.iter()) {
                    *p = (1.0 - lr).mul_add(*p, lr * i);
                }
                l2_normalize(pref);
            }
            Entry::Vacant(vac) => {
                let mut v = interaction_embedding.to_vec();
                l2_normalize(&mut v);
                vac.insert(v);
            }
        }
        true
    }

    /// Compute cosine similarity between a user's preference and a candidate embedding.
    ///
    /// Returns `None` if the user has no preference vector or dimensions mismatch.
    /// The stored preference is L2-normalized; the candidate is normalized on-the-fly
    /// so callers do not need to pre-normalize.
    #[must_use]
    #[allow(clippy::significant_drop_tightening)]
    pub fn cosine_similarity(&self, user_id: u64, candidate: &[f32]) -> Option<f32> {
        if candidate.len() != self.dim {
            return None;
        }
        let pref = self.inner.get(&user_id)?;
        let dot: f32 = pref.iter().zip(candidate.iter()).map(|(a, b)| a * b).sum();
        // Divide by the candidate's L2 norm to get true cosine similarity.
        // The stored preference is already unit-length, so we only need
        // to normalize the candidate side.
        let candidate_norm: f32 = candidate.iter().map(|x| x * x).sum::<f32>().sqrt();
        if candidate_norm < f32::EPSILON {
            return Some(0.0);
        }
        Some(dot / candidate_norm)
    }

    /// Number of users with stored preferences.
    #[must_use]
    pub fn len(&self) -> usize {
        self.inner.len()
    }

    /// Whether no preferences are stored.
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.inner.is_empty()
    }
}

/// L2-normalize a vector in-place. If the vector has zero magnitude, it remains
/// as-is (all zeros).
fn l2_normalize(vec: &mut [f32]) {
    let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
    if norm > f32::EPSILON {
        for v in vec.iter_mut() {
            *v /= norm;
        }
    }
}

#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::float_cmp)]
mod tests {
    use super::*;

    #[test]
    fn set_and_get() {
        let pv = PreferenceVectors::new(3);
        assert!(pv.set(1, vec![3.0, 4.0, 0.0]));
        let v = pv.get(1).unwrap();
        // 3/5, 4/5, 0
        assert!((v[0] - 0.6).abs() < 1e-6);
        assert!((v[1] - 0.8).abs() < 1e-6);
        assert!((v[2] - 0.0).abs() < 1e-6);
    }

    #[test]
    fn set_wrong_dim_rejected() {
        let pv = PreferenceVectors::new(3);
        assert!(!pv.set(1, vec![1.0, 2.0]));
        assert!(pv.get(1).is_none());
    }

    #[test]
    fn update_creates_initial() {
        let pv = PreferenceVectors::new(3);
        assert!(pv.update(1, &[1.0, 0.0, 0.0]));
        let v = pv.get(1).unwrap();
        assert!((v[0] - 1.0).abs() < 1e-6);
    }

    #[test]
    fn update_blends() {
        let pv = PreferenceVectors::with_learning_rate(2, 0.5);
        let _ = pv.set(1, vec![1.0, 0.0]);
        let _ = pv.update(1, &[0.0, 1.0]);
        let v = pv.get(1).unwrap();
        // After blend: (0.5, 0.5), normalized: (1/sqrt(2), 1/sqrt(2))
        let expected = 1.0 / 2.0f32.sqrt();
        assert!((v[0] - expected).abs() < 1e-5);
        assert!((v[1] - expected).abs() < 1e-5);
    }

    #[test]
    fn cosine_similarity_normalized() {
        let pv = PreferenceVectors::new(3);
        let _ = pv.set(1, vec![1.0, 0.0, 0.0]);
        // Cosine with self = 1.0
        let sim = pv.cosine_similarity(1, &[1.0, 0.0, 0.0]).unwrap();
        assert!((sim - 1.0).abs() < 1e-6);
        // Orthogonal = 0.0
        let sim = pv.cosine_similarity(1, &[0.0, 1.0, 0.0]).unwrap();
        assert!(sim.abs() < 1e-6);
    }

    #[test]
    fn cosine_similarity_no_pref() {
        let pv = PreferenceVectors::new(3);
        assert!(pv.cosine_similarity(1, &[1.0, 0.0, 0.0]).is_none());
    }

    #[test]
    fn l2_normalize_zero_vec() {
        let mut v = vec![0.0f32, 0.0, 0.0];
        l2_normalize(&mut v);
        assert!(v.iter().all(|&x| x == 0.0));
    }

    #[test]
    fn len_and_is_empty() {
        let pv = PreferenceVectors::new(3);
        assert!(pv.is_empty());
        assert_eq!(pv.len(), 0);
        let _ = pv.set(1, vec![1.0, 0.0, 0.0]);
        assert!(!pv.is_empty());
        assert_eq!(pv.len(), 1);
    }

    mod proptests {
        use super::*;
        use proptest::prelude::*;

        proptest! {
            /// After any sequence of updates, the L2 norm stays approximately 1.0.
            #[test]
            fn l2_norm_invariant(
                updates in proptest::collection::vec(
                    proptest::collection::vec(-1.0f32..1.0f32, 4..=4),
                    1..20
                ),
            ) {
                let pv = PreferenceVectors::new(4);
                for emb in &updates {
                    let _ = pv.update(1, emb);
                }
                let v = pv.get(1).unwrap();
                let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
                // After a sequence of updates, the vector should be unit-length
                // (within floating-point tolerance) or exactly zero if all inputs
                // collapse to the origin.
                prop_assert!(
                    (norm - 1.0).abs() < 1e-4 || norm < f32::EPSILON,
                    "norm was {norm}, expected ~1.0"
                );
            }
        }
    }
}