//! Preference vector: per-user taste embedding with L2 normalization invariant. //! //! Tracks user taste by maintaining a preference vector that evolves with //! interactions. The vector is L2-normalized on every update to ensure //! consistent cosine similarity scoring during personalized ranking. use dashmap::DashMap; /// Per-user preference vector, L2-normalized. /// /// The vector is updated via exponential moving average: each new interaction /// embedding is blended with the current preference using a learning rate. /// /// Thread-safe via `DashMap` -- concurrent updates to different users never /// contend. pub struct PreferenceVectors { /// `user_id` -> normalized preference vector inner: DashMap>, /// Dimensionality of the embedding space. All vectors must have this length. dim: usize, /// Learning rate for exponential moving average updates. /// Default: 0.1 (the new interaction contributes 10% to the updated preference). learning_rate: f32, } impl PreferenceVectors { /// Create a new preference vector store for the given embedding dimensionality. #[must_use] pub fn new(dim: usize) -> Self { Self { inner: DashMap::new(), dim, learning_rate: 0.1, } } /// Create with a custom learning rate. #[must_use] pub fn with_learning_rate(dim: usize, learning_rate: f32) -> Self { Self { inner: DashMap::new(), dim, learning_rate, } } /// Get the current preference vector for a user (cloned). /// /// Returns `None` if no preference has been recorded. #[must_use] pub fn get(&self, user_id: u64) -> Option> { self.inner.get(&user_id).map(|r| r.clone()) } /// Set the preference vector directly (e.g., from cold-start initialization). /// /// The vector is L2-normalized before storage. Returns `false` if the /// dimension does not match. #[must_use] pub fn set(&self, user_id: u64, mut vec: Vec) -> bool { if vec.len() != self.dim { return false; } l2_normalize(&mut vec); self.inner.insert(user_id, vec); true } /// Update a user's preference vector by blending with an interaction embedding. /// /// Uses exponential moving average: /// `pref = (1 - lr) * pref + lr * interaction` /// then L2-normalizes the result. /// /// If no preference exists yet, the interaction embedding becomes the initial /// preference (after normalization). Uses `Entry::Occupied`/`Entry::Vacant` /// to avoid double-applying the blend on first insertion. /// /// Returns `false` if the interaction embedding dimension does not match. #[must_use] pub fn update(&self, user_id: u64, interaction_embedding: &[f32]) -> bool { use dashmap::mapref::entry::Entry; if interaction_embedding.len() != self.dim { return false; } let lr = self.learning_rate; match self.inner.entry(user_id) { Entry::Occupied(mut occ) => { let pref = occ.get_mut(); for (p, &i) in pref.iter_mut().zip(interaction_embedding.iter()) { *p = (1.0 - lr).mul_add(*p, lr * i); } l2_normalize(pref); } Entry::Vacant(vac) => { let mut v = interaction_embedding.to_vec(); l2_normalize(&mut v); vac.insert(v); } } true } /// Compute cosine similarity between a user's preference and a candidate embedding. /// /// Returns `None` if the user has no preference vector or dimensions mismatch. /// The stored preference is L2-normalized; the candidate is normalized on-the-fly /// so callers do not need to pre-normalize. #[must_use] #[allow(clippy::significant_drop_tightening)] pub fn cosine_similarity(&self, user_id: u64, candidate: &[f32]) -> Option { if candidate.len() != self.dim { return None; } let pref = self.inner.get(&user_id)?; let dot: f32 = pref.iter().zip(candidate.iter()).map(|(a, b)| a * b).sum(); // Divide by the candidate's L2 norm to get true cosine similarity. // The stored preference is already unit-length, so we only need // to normalize the candidate side. let candidate_norm: f32 = candidate.iter().map(|x| x * x).sum::().sqrt(); if candidate_norm < f32::EPSILON { return Some(0.0); } Some(dot / candidate_norm) } /// Number of users with stored preferences. #[must_use] pub fn len(&self) -> usize { self.inner.len() } /// Whether no preferences are stored. #[must_use] pub fn is_empty(&self) -> bool { self.inner.is_empty() } } /// L2-normalize a vector in-place. If the vector has zero magnitude, it remains /// as-is (all zeros). fn l2_normalize(vec: &mut [f32]) { let norm: f32 = vec.iter().map(|x| x * x).sum::().sqrt(); if norm > f32::EPSILON { for v in vec.iter_mut() { *v /= norm; } } } #[cfg(test)] #[allow(clippy::unwrap_used, clippy::float_cmp)] mod tests { use super::*; #[test] fn set_and_get() { let pv = PreferenceVectors::new(3); assert!(pv.set(1, vec![3.0, 4.0, 0.0])); let v = pv.get(1).unwrap(); // 3/5, 4/5, 0 assert!((v[0] - 0.6).abs() < 1e-6); assert!((v[1] - 0.8).abs() < 1e-6); assert!((v[2] - 0.0).abs() < 1e-6); } #[test] fn set_wrong_dim_rejected() { let pv = PreferenceVectors::new(3); assert!(!pv.set(1, vec![1.0, 2.0])); assert!(pv.get(1).is_none()); } #[test] fn update_creates_initial() { let pv = PreferenceVectors::new(3); assert!(pv.update(1, &[1.0, 0.0, 0.0])); let v = pv.get(1).unwrap(); assert!((v[0] - 1.0).abs() < 1e-6); } #[test] fn update_blends() { let pv = PreferenceVectors::with_learning_rate(2, 0.5); let _ = pv.set(1, vec![1.0, 0.0]); let _ = pv.update(1, &[0.0, 1.0]); let v = pv.get(1).unwrap(); // After blend: (0.5, 0.5), normalized: (1/sqrt(2), 1/sqrt(2)) let expected = 1.0 / 2.0f32.sqrt(); assert!((v[0] - expected).abs() < 1e-5); assert!((v[1] - expected).abs() < 1e-5); } #[test] fn cosine_similarity_normalized() { let pv = PreferenceVectors::new(3); let _ = pv.set(1, vec![1.0, 0.0, 0.0]); // Cosine with self = 1.0 let sim = pv.cosine_similarity(1, &[1.0, 0.0, 0.0]).unwrap(); assert!((sim - 1.0).abs() < 1e-6); // Orthogonal = 0.0 let sim = pv.cosine_similarity(1, &[0.0, 1.0, 0.0]).unwrap(); assert!(sim.abs() < 1e-6); } #[test] fn cosine_similarity_no_pref() { let pv = PreferenceVectors::new(3); assert!(pv.cosine_similarity(1, &[1.0, 0.0, 0.0]).is_none()); } #[test] fn l2_normalize_zero_vec() { let mut v = vec![0.0f32, 0.0, 0.0]; l2_normalize(&mut v); assert!(v.iter().all(|&x| x == 0.0)); } #[test] fn len_and_is_empty() { let pv = PreferenceVectors::new(3); assert!(pv.is_empty()); assert_eq!(pv.len(), 0); let _ = pv.set(1, vec![1.0, 0.0, 0.0]); assert!(!pv.is_empty()); assert_eq!(pv.len(), 1); } mod proptests { use super::*; use proptest::prelude::*; proptest! { /// After any sequence of updates, the L2 norm stays approximately 1.0. #[test] fn l2_norm_invariant( updates in proptest::collection::vec( proptest::collection::vec(-1.0f32..1.0f32, 4..=4), 1..20 ), ) { let pv = PreferenceVectors::new(4); for emb in &updates { let _ = pv.update(1, emb); } let v = pv.get(1).unwrap(); let norm: f32 = v.iter().map(|x| x * x).sum::().sqrt(); // After a sequence of updates, the vector should be unit-length // (within floating-point tolerance) or exactly zero if all inputs // collapse to the origin. prop_assert!( (norm - 1.0).abs() < 1e-4 || norm < f32::EPSILON, "norm was {norm}, expected ~1.0" ); } } } }