fix: health reachable_nodes uses alive filter, cluster status includes self in node_count
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

- handle_health: use all_members() filtered to Alive state instead of
  members() which returned 0 when nodes were in Suspect state
- handle_cluster_status: add +1 for self since members map excludes local node
This commit is contained in:
jordan 2026-03-07 19:13:31 -07:00
parent 9c36a8e3b3
commit 4360a17dd3

View File

@ -7,6 +7,7 @@ use std::sync::Arc;
use tracing::instrument;
use crate::gateway::service::GatewayState;
use crate::membership::NodeState;
use crate::sharding::ShardId;
use super::types::{ApiError, ClusterStatusResponse, HealthResponse, NodeStatusInfo, QueryParams};
@ -90,10 +91,15 @@ pub async fn handle_query(
/// GET /v1/health - Health check.
#[instrument(skip(state))]
pub async fn handle_health(State(state): State<Arc<GatewayState>>) -> Json<HealthResponse> {
let members = state.membership.members();
let all_members = state.membership.all_members();
let alive_count = all_members.iter().filter(|(_, s)| *s == NodeState::Alive).count();
let joined = state.membership.is_joined();
Json(HealthResponse { healthy: joined, reachable_nodes: members.len(), joined })
Json(HealthResponse {
healthy: joined && alive_count > 0,
reachable_nodes: alive_count,
joined,
})
}
/// GET /v1/cluster/status - Cluster status.
@ -112,8 +118,9 @@ pub async fn handle_cluster_status(
})
.collect();
// +1 for self (members map excludes the local node)
Json(ClusterStatusResponse {
node_count: all_members.len(),
node_count: all_members.len() + 1,
shard_count: meta.num_shards() as u32,
meta_version: meta.version,
nodes,