{ "dashboard": { "title": "StemeDB - Storage Health", "tags": ["stemedb", "storage", "wal"], "timezone": "browser", "panels": [ { "id": 1, "title": "WAL Fsync Latency (p50, p95, p99)", "type": "graph", "targets": [ { "expr": "histogram_quantile(0.50, rate(stemedb_wal_fsync_latency_seconds_bucket[5m]))", "legendFormat": "p50" }, { "expr": "histogram_quantile(0.95, rate(stemedb_wal_fsync_latency_seconds_bucket[5m]))", "legendFormat": "p95" }, { "expr": "histogram_quantile(0.99, rate(stemedb_wal_fsync_latency_seconds_bucket[5m]))", "legendFormat": "p99" } ], "yaxes": [ {"format": "s", "label": "Latency"}, {"format": "short"} ], "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0} }, { "id": 2, "title": "WAL Disk Usage", "type": "gauge", "targets": [ { "expr": "stemedb_wal_disk_usage_bytes / (1024*1024*1024)", "legendFormat": "Disk Usage (GB)" } ], "fieldConfig": { "defaults": { "unit": "decgbytes", "min": 0, "max": 100, "thresholds": { "mode": "percentage", "steps": [ {"value": 0, "color": "green"}, {"value": 70, "color": "yellow"}, {"value": 90, "color": "red"} ] } } }, "gridPos": {"h": 8, "w": 6, "x": 12, "y": 0} }, { "id": 3, "title": "WAL Write Rate", "type": "graph", "targets": [ { "expr": "rate(stemedb_wal_writes_total[5m])", "legendFormat": "Writes/sec" }, { "expr": "rate(stemedb_wal_bytes_written_total[5m]) / (1024*1024)", "legendFormat": "MB/sec" } ], "yaxes": [ {"format": "ops", "label": "Rate"}, {"format": "short"} ], "gridPos": {"h": 8, "w": 6, "x": 18, "y": 0} }, { "id": 4, "title": "WAL Error Rate", "type": "graph", "targets": [ { "expr": "rate(stemedb_wal_write_errors_total[5m])", "legendFormat": "{{error}}" } ], "yaxes": [ {"format": "ops", "label": "Errors/sec"}, {"format": "short"} ], "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}, "alert": { "conditions": [ { "evaluator": {"params": [0.01], "type": "gt"}, "operator": {"type": "and"}, "query": {"params": ["A", "5m", "now"]}, "reducer": {"type": "avg"} } ], "name": "High WAL Error Rate" } }, { "id": 5, "title": "Storage Operation Latency (by operation)", "type": "graph", "targets": [ { "expr": "histogram_quantile(0.99, rate(stemedb_storage_operation_duration_seconds_bucket[5m]))", "legendFormat": "{{operation}} ({{backend}})" } ], "yaxes": [ {"format": "s", "label": "Latency (p99)"}, {"format": "short"} ], "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8} }, { "id": 6, "title": "Index Lookup Latency", "type": "graph", "targets": [ { "expr": "histogram_quantile(0.95, rate(stemedb_index_lookup_duration_seconds_bucket[5m]))", "legendFormat": "{{index}} (p95)" } ], "yaxes": [ {"format": "s", "label": "Latency"}, {"format": "short"} ], "gridPos": {"h": 8, "w": 12, "x": 0, "y": 16} }, { "id": 7, "title": "Storage Operations/sec", "type": "graph", "targets": [ { "expr": "rate(stemedb_storage_operations_total[5m])", "legendFormat": "{{operation}} ({{backend}})" } ], "yaxes": [ {"format": "ops", "label": "Operations/sec"}, {"format": "short"} ], "gridPos": {"h": 8, "w": 12, "x": 12, "y": 16} } ], "refresh": "30s", "schemaVersion": 30, "version": 1 } }