stemedb/crates/stemedb-sync/src/gossip.rs
jordan b3e8a9a058 feat: Multi-application expansion with chaos testing and community UI
Major additions:
- Community Next.js app (port 18187) for browsing claims with API docs
- stemedb-chaos crate: Fault injection, chaos testing, CRDT properties
- Latent ingestion system: Reddit/FDA ingesters with ADK-Go agents
- Disputed claims handling: Manual review workflows and validation
- Aphoria security scanner: New extractors (SQL injection, command
  injection, weak crypto, TLS version), policy-based ignores, UAT reports
- Docker infrastructure: Dockerfile, docker-compose.yml for full stack
- VulnBank demo: Intentionally vulnerable multi-language test corpus

SDK & API enhancements:
- Source registry handlers for tracking data provenance
- Metrics endpoint
- Skeptic filtering improvements

Code quality:
- Split 14 large files (>500 lines) into focused modules
- All files now under 500-line limit per project guidelines

Documentation:
- Chaos testing guide, circuit breakers, observability docs
- Phase 7 UAT documentation updates
- Martin Kleppmann technical writer agent

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 01:24:14 -07:00

334 lines
10 KiB
Rust

//! Gossip broadcast implementation.
//!
//! The gossip layer pushes new assertions to peers immediately after
//! local ingestion, providing low-latency replication.
//!
//! # Design
//!
//! - **Fanout**: Each assertion is sent to N peers (configurable)
//! - **Best-effort**: Failures are logged but don't block ingestion
//! - **Idempotent**: Receivers handle duplicates gracefully
//!
//! # Example
//!
//! ```ignore
//! let broadcaster = GossipBroadcaster::new(vec!["http://peer:18182".into()]).await?;
//!
//! // Called after each successful ingestion
//! broadcaster.broadcast(&hash, &data, &hlc).await?;
//! ```
use crate::error::Result;
use async_trait::async_trait;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::Arc;
use std::time::Instant;
use stemedb_core::types::HlcTimestamp;
use stemedb_rpc::proto::GossipRequest;
use stemedb_rpc::SyncClient;
use tokio::sync::Mutex;
use tracing::{debug, info, instrument, warn};
// Re-export the trait and error from stemedb-ingest for convenience
pub use stemedb_ingest::gossip::{GossipBroadcast, GossipError};
/// Token bucket rate limiter for gossip broadcast.
///
/// Limits the number of messages that can be sent per second to prevent
/// overwhelming peer nodes under high ingestion load.
struct RateLimiter {
/// Maximum tokens (messages) allowed per second.
max_per_second: u32,
/// Current token count.
tokens: Mutex<f64>,
/// Last refill time.
last_refill: Mutex<Instant>,
}
impl RateLimiter {
/// Create a new rate limiter with the given messages-per-second limit.
fn new(max_per_second: u32) -> Self {
Self {
max_per_second,
tokens: Mutex::new(max_per_second as f64),
last_refill: Mutex::new(Instant::now()),
}
}
/// Try to acquire a token. Returns true if allowed, false if rate limited.
async fn try_acquire(&self) -> bool {
let mut tokens = self.tokens.lock().await;
let mut last_refill = self.last_refill.lock().await;
// Refill tokens based on elapsed time
let now = Instant::now();
let elapsed = now.duration_since(*last_refill);
let refill = elapsed.as_secs_f64() * self.max_per_second as f64;
*tokens = (*tokens + refill).min(self.max_per_second as f64);
*last_refill = now;
// Try to consume a token
if *tokens >= 1.0 {
*tokens -= 1.0;
true
} else {
false
}
}
}
/// Gossip broadcaster that sends assertions to peer nodes.
pub struct GossipBroadcaster {
clients: Vec<Arc<SyncClient>>,
fanout: usize,
enabled: AtomicBool,
/// Optional rate limiter to prevent overwhelming peers.
rate_limiter: Option<RateLimiter>,
// Metrics
messages_sent: AtomicU64,
send_failures: AtomicU64,
rate_limited: AtomicU64,
}
impl GossipBroadcaster {
/// Create a new gossip broadcaster.
///
/// # Arguments
///
/// * `peer_addrs` - List of peer addresses to connect to
///
/// # Returns
///
/// A broadcaster connected to all reachable peers.
pub async fn new(peer_addrs: Vec<String>) -> Result<Self> {
Self::with_fanout(peer_addrs, 3).await
}
/// Create a gossip broadcaster with custom fanout.
///
/// # Arguments
///
/// * `peer_addrs` - List of peer addresses
/// * `fanout` - Number of peers to send each message to
pub async fn with_fanout(peer_addrs: Vec<String>, fanout: usize) -> Result<Self> {
let mut clients = Vec::with_capacity(peer_addrs.len());
for addr in &peer_addrs {
match SyncClient::connect(addr).await {
Ok(client) => {
info!(peer = %addr, "Connected to peer for gossip");
clients.push(Arc::new(client));
}
Err(e) => {
// Log but don't fail - peer may come online later
warn!(peer = %addr, error = %e, "Failed to connect to peer");
}
}
}
if clients.is_empty() && !peer_addrs.is_empty() {
warn!("No peers reachable for gossip broadcast");
}
Ok(Self {
clients,
fanout,
enabled: AtomicBool::new(true),
rate_limiter: None,
messages_sent: AtomicU64::new(0),
send_failures: AtomicU64::new(0),
rate_limited: AtomicU64::new(0),
})
}
/// Configure rate limiting for gossip broadcast.
///
/// # Arguments
///
/// * `max_per_second` - Maximum messages to send per second
///
/// # Example
///
/// ```ignore
/// let broadcaster = GossipBroadcaster::new(peers).await?
/// .with_rate_limit(1000); // Max 1000 messages/sec
/// ```
#[must_use]
pub fn with_rate_limit(mut self, max_per_second: u32) -> Self {
self.rate_limiter = Some(RateLimiter::new(max_per_second));
self
}
/// Get the number of messages sent.
pub fn messages_sent(&self) -> u64 {
self.messages_sent.load(Ordering::Relaxed)
}
/// Get the number of send failures.
pub fn send_failures(&self) -> u64 {
self.send_failures.load(Ordering::Relaxed)
}
/// Get the number of connected clients.
pub fn client_count(&self) -> usize {
self.clients.len()
}
/// Get the number of rate-limited messages.
pub fn rate_limited(&self) -> u64 {
self.rate_limited.load(Ordering::Relaxed)
}
}
#[async_trait]
impl GossipBroadcast for GossipBroadcaster {
#[instrument(skip(self, hash, data, hlc), fields(hash = %hex::encode(&hash[..8])))]
async fn broadcast(
&self,
hash: &[u8; 32],
data: &[u8],
hlc: &HlcTimestamp,
) -> std::result::Result<(), GossipError> {
if !self.enabled.load(Ordering::Relaxed) {
debug!("Gossip disabled, skipping broadcast");
return Ok(());
}
if self.clients.is_empty() {
debug!("No peers connected, skipping gossip");
return Ok(());
}
// Check rate limiter if configured
if let Some(ref limiter) = self.rate_limiter {
if !limiter.try_acquire().await {
self.rate_limited.fetch_add(1, Ordering::Relaxed);
debug!("Gossip rate limited, skipping broadcast");
return Ok(());
}
}
let request = GossipRequest {
assertion_hash: hash.to_vec(),
assertion_data: data.to_vec(),
hlc_time: hlc.time_ntp64,
hlc_counter: 0, // Counter is embedded in time_ntp64
hlc_node_id: hlc.node_id.to_vec(),
};
// Select peers for fanout (round-robin or random in future)
let targets: Vec<_> = self.clients.iter().take(self.fanout).collect();
if targets.is_empty() {
return Ok(());
}
debug!(peer_count = targets.len(), "Broadcasting to peers");
// Send to all target peers concurrently
let mut handles = Vec::with_capacity(targets.len());
for client in targets {
let client = client.clone();
let req = request.clone();
handles.push(tokio::spawn(async move { client.gossip(req).await }));
}
// Collect results
let mut success_count = 0u32;
let mut failure_count = 0u32;
for handle in handles {
match handle.await {
Ok(Ok(response)) => {
if response.accepted {
success_count += 1;
} else {
warn!(error = %response.error, "Peer rejected gossip");
failure_count += 1;
}
}
Ok(Err(e)) => {
warn!(error = %e, "Gossip RPC failed");
failure_count += 1;
}
Err(e) => {
warn!(error = %e, "Gossip task panicked");
failure_count += 1;
}
}
}
// Update metrics
self.messages_sent.fetch_add(u64::from(success_count), Ordering::Relaxed);
self.send_failures.fetch_add(u64::from(failure_count), Ordering::Relaxed);
// Best-effort: success if at least one peer accepted
if success_count > 0 {
debug!(success = success_count, failures = failure_count, "Gossip broadcast complete");
Ok(())
} else if failure_count > 0 {
// All peers failed, but don't block the caller
warn!(failures = failure_count, "All gossip targets failed");
Ok(())
} else {
Ok(())
}
}
fn is_enabled(&self) -> bool {
self.enabled.load(Ordering::Relaxed)
}
fn enable(&self) {
self.enabled.store(true, Ordering::Relaxed);
info!("Gossip broadcast enabled");
}
fn disable(&self) {
self.enabled.store(false, Ordering::Relaxed);
info!("Gossip broadcast disabled");
}
}
#[cfg(test)]
mod tests {
use super::*;
use stemedb_ingest::NoOpGossipBroadcast;
#[tokio::test]
async fn test_noop_broadcaster() {
let broadcaster = NoOpGossipBroadcast;
let hash = [1u8; 32];
let data = vec![1, 2, 3];
let hlc = HlcTimestamp::new(1000, [1u8; 16]);
broadcaster.broadcast(&hash, &data, &hlc).await.expect("should succeed");
assert!(!broadcaster.is_enabled());
}
#[tokio::test]
async fn test_broadcaster_no_peers() {
let broadcaster = GossipBroadcaster::new(vec![]).await.expect("create");
assert_eq!(broadcaster.client_count(), 0);
assert!(broadcaster.is_enabled());
let hash = [1u8; 32];
let data = vec![1, 2, 3];
let hlc = HlcTimestamp::new(1000, [1u8; 16]);
// Should succeed even with no peers
broadcaster.broadcast(&hash, &data, &hlc).await.expect("should succeed");
}
#[tokio::test]
async fn test_enable_disable() {
let broadcaster = GossipBroadcaster::new(vec![]).await.expect("create");
assert!(broadcaster.is_enabled());
broadcaster.disable();
assert!(!broadcaster.is_enabled());
broadcaster.enable();
assert!(broadcaster.is_enabled());
}
}