Week 1 — deployment prerequisites: - Add TIDAL_API_KEY Bearer auth middleware (constant-time comparison) - Handle SIGTERM alongside ctrl-c for graceful shutdown - Remove test-utils feature from production tidal-server binary - Fix standalone Dockerfile; add cluster Dockerfile and docker-compose - Extract MultiRegionState into state.rs with per-region TidalDb map Week 2 — operational middleware and observability: - Add body limit (2MB), request timeout (30s), concurrency limit (100) - Add SetRequestIdLayer + PropagateRequestIdLayer (x-request-id header) - Add TraceLayer with structured spans including request ID - Activate Prometheus /metrics endpoint via --metrics flag - Add monitoring.md, recovery.md, prometheus-alerts.yaml, grafana-dashboard.json Week 3 — query latency histograms and middleware integration tests: - Add QUERY_LATENCY_BOUNDS (100µs–10s) histogram to tidal library - Instrument retrieve() and search() with tidaldb_retrieve/search_latency_us - Fix: search() latency now recorded on error paths (was skipped via ?) - Lib+bin split in tidal-server enabling integration tests - Add 8 middleware integration tests (auth, body limit, request ID) - Add 2 Prometheus alert rules and 2 Grafana latency panels Post-review fixes: - Fix SIGTERM handler compilation on non-Unix targets (#[cfg(unix)] guard) - Exempt /health from TimeoutLayer + ConcurrencyLimitLayer (prevents false liveness failures under load) - Case-insensitive Bearer scheme matching per RFC 7235 §2.1
524 lines
14 KiB
JSON
524 lines
14 KiB
JSON
{
|
|
"uid": "tidaldb-overview",
|
|
"title": "tidalDB Overview",
|
|
"description": "Operational dashboard covering all 20 tidalDB metrics including retrieve and search latency histograms.",
|
|
"schemaVersion": 38,
|
|
"version": 2,
|
|
"refresh": "30s",
|
|
"time": { "from": "now-1h", "to": "now" },
|
|
"timepicker": {},
|
|
"tags": ["tidaldb"],
|
|
"panels": [
|
|
{
|
|
"id": 1,
|
|
"type": "row",
|
|
"title": "Health Overview",
|
|
"gridPos": { "x": 0, "y": 0, "w": 24, "h": 1 },
|
|
"collapsed": false
|
|
},
|
|
{
|
|
"id": 2,
|
|
"type": "stat",
|
|
"title": "Health",
|
|
"gridPos": { "x": 0, "y": 1, "w": 4, "h": 4 },
|
|
"targets": [
|
|
{
|
|
"expr": "tidaldb_health_ok",
|
|
"legendFormat": "health_ok"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"mappings": [
|
|
{ "type": "value", "options": { "0": { "text": "UNHEALTHY", "color": "red" } } },
|
|
{ "type": "value", "options": { "1": { "text": "OK", "color": "green" } } }
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "red", "value": 0 },
|
|
{ "color": "green", "value": 1 }
|
|
]
|
|
},
|
|
"color": { "mode": "thresholds" }
|
|
}
|
|
},
|
|
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "orientation": "auto", "colorMode": "background" }
|
|
},
|
|
{
|
|
"id": 3,
|
|
"type": "stat",
|
|
"title": "Uptime",
|
|
"gridPos": { "x": 4, "y": 1, "w": 4, "h": 4 },
|
|
"targets": [
|
|
{
|
|
"expr": "tidaldb_uptime_seconds",
|
|
"legendFormat": "uptime_seconds"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "s",
|
|
"color": { "mode": "fixed", "fixedColor": "blue" }
|
|
}
|
|
},
|
|
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "orientation": "auto", "colorMode": "value" }
|
|
},
|
|
{
|
|
"id": 4,
|
|
"type": "stat",
|
|
"title": "Degradation Level",
|
|
"gridPos": { "x": 8, "y": 1, "w": 4, "h": 4 },
|
|
"targets": [
|
|
{
|
|
"expr": "tidaldb_degradation_level",
|
|
"legendFormat": "degradation_level"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": 0 },
|
|
{ "color": "red", "value": 1 }
|
|
]
|
|
},
|
|
"color": { "mode": "thresholds" }
|
|
}
|
|
},
|
|
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "orientation": "auto", "colorMode": "background" }
|
|
},
|
|
{
|
|
"id": 5,
|
|
"type": "stat",
|
|
"title": "Version",
|
|
"gridPos": { "x": 12, "y": 1, "w": 4, "h": 4 },
|
|
"targets": [
|
|
{
|
|
"expr": "tidaldb_info",
|
|
"legendFormat": "{{version}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "mode": "fixed", "fixedColor": "text" }
|
|
}
|
|
},
|
|
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "orientation": "auto", "colorMode": "none", "textMode": "name" }
|
|
},
|
|
|
|
{
|
|
"id": 10,
|
|
"type": "row",
|
|
"title": "Signal Throughput",
|
|
"gridPos": { "x": 0, "y": 5, "w": 24, "h": 1 },
|
|
"collapsed": false
|
|
},
|
|
{
|
|
"id": 11,
|
|
"type": "timeseries",
|
|
"title": "Signal Write Rate (per second)",
|
|
"gridPos": { "x": 0, "y": 6, "w": 8, "h": 6 },
|
|
"targets": [
|
|
{
|
|
"expr": "rate(tidaldb_signal_writes_total[5m])",
|
|
"legendFormat": "writes/s"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "reqps",
|
|
"color": { "mode": "palette-classic" }
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 12,
|
|
"type": "timeseries",
|
|
"title": "Signal Write Latency (µs)",
|
|
"gridPos": { "x": 8, "y": 6, "w": 8, "h": 6 },
|
|
"targets": [
|
|
{
|
|
"expr": "tidaldb_signal_write_latency_us",
|
|
"legendFormat": "latency_us"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "µs",
|
|
"color": { "mode": "palette-classic" }
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 13,
|
|
"type": "gauge",
|
|
"title": "Signal Hot Entries",
|
|
"gridPos": { "x": 16, "y": 6, "w": 8, "h": 6 },
|
|
"targets": [
|
|
{
|
|
"expr": "tidaldb_signal_hot_entries",
|
|
"legendFormat": "hot_entries"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"min": 0,
|
|
"max": 5000000,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": 0 },
|
|
{ "color": "yellow", "value": 4000000 },
|
|
{ "color": "red", "value": 5000000 }
|
|
]
|
|
},
|
|
"color": { "mode": "thresholds" }
|
|
}
|
|
}
|
|
},
|
|
|
|
{
|
|
"id": 14,
|
|
"type": "timeseries",
|
|
"title": "Retrieve Latency Percentiles (µs)",
|
|
"gridPos": { "x": 0, "y": 12, "w": 12, "h": 6 },
|
|
"targets": [
|
|
{
|
|
"expr": "histogram_quantile(0.50, rate(tidaldb_retrieve_latency_us_bucket[$__rate_interval]))",
|
|
"legendFormat": "p50"
|
|
},
|
|
{
|
|
"expr": "histogram_quantile(0.95, rate(tidaldb_retrieve_latency_us_bucket[$__rate_interval]))",
|
|
"legendFormat": "p95"
|
|
},
|
|
{
|
|
"expr": "histogram_quantile(0.99, rate(tidaldb_retrieve_latency_us_bucket[$__rate_interval]))",
|
|
"legendFormat": "p99"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "µs",
|
|
"color": { "mode": "palette-classic" },
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": 0 },
|
|
{ "color": "yellow", "value": 500000 }
|
|
]
|
|
}
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 15,
|
|
"type": "timeseries",
|
|
"title": "Search Latency Percentiles (µs)",
|
|
"gridPos": { "x": 12, "y": 12, "w": 12, "h": 6 },
|
|
"targets": [
|
|
{
|
|
"expr": "histogram_quantile(0.50, rate(tidaldb_search_latency_us_bucket[$__rate_interval]))",
|
|
"legendFormat": "p50"
|
|
},
|
|
{
|
|
"expr": "histogram_quantile(0.95, rate(tidaldb_search_latency_us_bucket[$__rate_interval]))",
|
|
"legendFormat": "p95"
|
|
},
|
|
{
|
|
"expr": "histogram_quantile(0.99, rate(tidaldb_search_latency_us_bucket[$__rate_interval]))",
|
|
"legendFormat": "p99"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "µs",
|
|
"color": { "mode": "palette-classic" },
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": 0 },
|
|
{ "color": "yellow", "value": 1000000 }
|
|
]
|
|
}
|
|
}
|
|
}
|
|
},
|
|
|
|
{
|
|
"id": 20,
|
|
"type": "row",
|
|
"title": "Durability",
|
|
"gridPos": { "x": 0, "y": 19, "w": 24, "h": 1 },
|
|
"collapsed": false
|
|
},
|
|
{
|
|
"id": 21,
|
|
"type": "timeseries",
|
|
"title": "Checkpoint Age (seconds)",
|
|
"gridPos": { "x": 0, "y": 20, "w": 6, "h": 6 },
|
|
"targets": [
|
|
{
|
|
"expr": "tidaldb_checkpoint_age_seconds",
|
|
"legendFormat": "checkpoint_age_seconds"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "s",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": 0 },
|
|
{ "color": "red", "value": 300 }
|
|
]
|
|
},
|
|
"color": { "mode": "thresholds" }
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 22,
|
|
"type": "stat",
|
|
"title": "Checkpoint Failures",
|
|
"gridPos": { "x": 6, "y": 20, "w": 6, "h": 6 },
|
|
"targets": [
|
|
{
|
|
"expr": "tidaldb_checkpoint_failures_total",
|
|
"legendFormat": "checkpoint_failures_total"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": 0 },
|
|
{ "color": "red", "value": 1 }
|
|
]
|
|
},
|
|
"color": { "mode": "thresholds" }
|
|
}
|
|
},
|
|
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "background" }
|
|
},
|
|
{
|
|
"id": 23,
|
|
"type": "timeseries",
|
|
"title": "WAL Lag (bytes)",
|
|
"gridPos": { "x": 12, "y": 20, "w": 6, "h": 6 },
|
|
"targets": [
|
|
{
|
|
"expr": "tidaldb_wal_lag_bytes",
|
|
"legendFormat": "wal_lag_bytes"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "bytes",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": 0 },
|
|
{ "color": "yellow", "value": 500000000 },
|
|
{ "color": "red", "value": 1000000000 }
|
|
]
|
|
},
|
|
"color": { "mode": "palette-classic" }
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 24,
|
|
"type": "timeseries",
|
|
"title": "WAL Compacted Segments (rate)",
|
|
"gridPos": { "x": 18, "y": 20, "w": 6, "h": 6 },
|
|
"targets": [
|
|
{
|
|
"expr": "rate(tidaldb_wal_compacted_segments_total[5m])",
|
|
"legendFormat": "compacted/s"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "cps",
|
|
"color": { "mode": "palette-classic" }
|
|
}
|
|
}
|
|
},
|
|
|
|
{
|
|
"id": 30,
|
|
"type": "row",
|
|
"title": "Index Health",
|
|
"gridPos": { "x": 0, "y": 26, "w": 24, "h": 1 },
|
|
"collapsed": false
|
|
},
|
|
{
|
|
"id": 31,
|
|
"type": "stat",
|
|
"title": "Tantivy Indexed Docs",
|
|
"gridPos": { "x": 0, "y": 27, "w": 4, "h": 4 },
|
|
"targets": [
|
|
{ "expr": "tidaldb_tantivy_indexed_docs", "legendFormat": "indexed_docs" }
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "short",
|
|
"color": { "mode": "fixed", "fixedColor": "blue" }
|
|
}
|
|
},
|
|
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value" }
|
|
},
|
|
{
|
|
"id": 32,
|
|
"type": "gauge",
|
|
"title": "Tantivy Segment Count",
|
|
"gridPos": { "x": 4, "y": 27, "w": 4, "h": 4 },
|
|
"targets": [
|
|
{ "expr": "tidaldb_tantivy_segment_count", "legendFormat": "segment_count" }
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"min": 0,
|
|
"max": 50,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": 0 },
|
|
{ "color": "yellow", "value": 20 },
|
|
{ "color": "red", "value": 30 }
|
|
]
|
|
},
|
|
"color": { "mode": "thresholds" }
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 33,
|
|
"type": "stat",
|
|
"title": "uSearch Vector Count",
|
|
"gridPos": { "x": 8, "y": 27, "w": 4, "h": 4 },
|
|
"targets": [
|
|
{ "expr": "tidaldb_usearch_vector_count", "legendFormat": "vector_count" }
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "short",
|
|
"color": { "mode": "fixed", "fixedColor": "blue" }
|
|
}
|
|
},
|
|
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value" }
|
|
},
|
|
{
|
|
"id": 34,
|
|
"type": "stat",
|
|
"title": "uSearch Index Size",
|
|
"gridPos": { "x": 12, "y": 27, "w": 4, "h": 4 },
|
|
"targets": [
|
|
{ "expr": "tidaldb_usearch_index_size_bytes", "legendFormat": "index_size_bytes" }
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "bytes",
|
|
"color": { "mode": "fixed", "fixedColor": "blue" }
|
|
}
|
|
},
|
|
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value" }
|
|
},
|
|
{
|
|
"id": 35,
|
|
"type": "stat",
|
|
"title": "Bitmap Index Cardinality",
|
|
"gridPos": { "x": 16, "y": 27, "w": 4, "h": 4 },
|
|
"targets": [
|
|
{ "expr": "tidaldb_bitmap_index_cardinality", "legendFormat": "bitmap_cardinality" }
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "short",
|
|
"color": { "mode": "fixed", "fixedColor": "blue" }
|
|
}
|
|
},
|
|
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value" }
|
|
},
|
|
|
|
{
|
|
"id": 40,
|
|
"type": "row",
|
|
"title": "Sessions",
|
|
"gridPos": { "x": 0, "y": 31, "w": 24, "h": 1 },
|
|
"collapsed": false
|
|
},
|
|
{
|
|
"id": 41,
|
|
"type": "timeseries",
|
|
"title": "Active Sessions",
|
|
"gridPos": { "x": 0, "y": 32, "w": 6, "h": 6 },
|
|
"targets": [
|
|
{ "expr": "tidaldb_active_sessions", "legendFormat": "active_sessions" }
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "short",
|
|
"color": { "mode": "palette-classic" }
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 42,
|
|
"type": "timeseries",
|
|
"title": "Session Close Rate (per second)",
|
|
"gridPos": { "x": 6, "y": 32, "w": 6, "h": 6 },
|
|
"targets": [
|
|
{ "expr": "rate(tidaldb_closed_sessions_total[5m])", "legendFormat": "closes/s" }
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "reqps",
|
|
"color": { "mode": "palette-classic" }
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 43,
|
|
"type": "stat",
|
|
"title": "Auto-Closed Sessions",
|
|
"gridPos": { "x": 12, "y": 32, "w": 4, "h": 6 },
|
|
"targets": [
|
|
{ "expr": "tidaldb_session_auto_closed_total", "legendFormat": "auto_closed_total" }
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "short",
|
|
"color": { "mode": "fixed", "fixedColor": "yellow" }
|
|
}
|
|
},
|
|
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value" }
|
|
},
|
|
{
|
|
"id": 44,
|
|
"type": "timeseries",
|
|
"title": "Rate Limited (per second)",
|
|
"gridPos": { "x": 16, "y": 32, "w": 8, "h": 6 },
|
|
"targets": [
|
|
{ "expr": "rate(tidaldb_rate_limited_total[5m])", "legendFormat": "rate_limited/s" }
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "reqps",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": 0 },
|
|
{ "color": "red", "value": 100 }
|
|
]
|
|
},
|
|
"color": { "mode": "palette-classic" }
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|