{ "dashboard": { "title": "StemeDB - SLI & Availability", "tags": ["stemedb", "sli", "availability"], "timezone": "browser", "panels": [ { "id": 1, "title": "Request Rate (by endpoint)", "type": "graph", "targets": [ { "expr": "rate(stemedb_http_requests_total[5m])", "legendFormat": "{{method}} {{path}}" } ], "yaxes": [ {"format": "reqps", "label": "Requests/sec"}, {"format": "short"} ], "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0} }, { "id": 2, "title": "Request Latency p99 (by endpoint)", "type": "heatmap", "targets": [ { "expr": "histogram_quantile(0.99, rate(stemedb_http_request_duration_seconds_bucket[5m]))", "legendFormat": "{{method}} {{path}}" } ], "yaxis": {"format": "s"}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0} }, { "id": 3, "title": "Error Rate (by type)", "type": "graph", "targets": [ { "expr": "rate(stemedb_errors_total[5m])", "legendFormat": "{{type}} ({{layer}})" } ], "yaxes": [ {"format": "ops", "label": "Errors/sec"}, {"format": "short"} ], "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}, "alert": { "conditions": [ { "evaluator": {"params": [0.01], "type": "gt"}, "operator": {"type": "and"}, "query": {"params": ["A", "5m", "now"]}, "reducer": {"type": "avg"} } ], "name": "High Error Rate" } }, { "id": 4, "title": "Availability (Success Rate)", "type": "gauge", "targets": [ { "expr": "sum(rate(stemedb_http_request_duration_seconds_count{status=~\"2..\"}[5m])) / sum(rate(stemedb_http_request_duration_seconds_count[5m]))", "legendFormat": "Availability %" } ], "fieldConfig": { "defaults": { "unit": "percentunit", "min": 0, "max": 1, "thresholds": { "mode": "percentage", "steps": [ {"value": 0, "color": "red"}, {"value": 0.95, "color": "yellow"}, {"value": 0.99, "color": "green"} ] } } }, "gridPos": {"h": 8, "w": 6, "x": 12, "y": 8} }, { "id": 5, "title": "Request Status Distribution", "type": "piechart", "targets": [ { "expr": "sum by (status) (rate(stemedb_http_request_duration_seconds_count[5m]))", "legendFormat": "{{status}}" } ], "gridPos": {"h": 8, "w": 6, "x": 18, "y": 8} }, { "id": 6, "title": "Latency Distribution (all endpoints)", "type": "graph", "targets": [ { "expr": "histogram_quantile(0.50, rate(stemedb_http_request_duration_seconds_bucket[5m]))", "legendFormat": "p50" }, { "expr": "histogram_quantile(0.95, rate(stemedb_http_request_duration_seconds_bucket[5m]))", "legendFormat": "p95" }, { "expr": "histogram_quantile(0.99, rate(stemedb_http_request_duration_seconds_bucket[5m]))", "legendFormat": "p99" } ], "yaxes": [ {"format": "s", "label": "Latency"}, {"format": "short"} ], "gridPos": {"h": 8, "w": 12, "x": 0, "y": 16} }, { "id": 7, "title": "Circuit Breaker Status", "type": "stat", "targets": [ { "expr": "stemedb_circuit_breakers_open", "legendFormat": "Open" }, { "expr": "stemedb_circuit_breakers_half_open", "legendFormat": "Half-Open" } ], "fieldConfig": { "defaults": { "unit": "short", "thresholds": { "mode": "absolute", "steps": [ {"value": 0, "color": "green"}, {"value": 1, "color": "yellow"}, {"value": 3, "color": "red"} ] } } }, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 16} } ], "refresh": "15s", "schemaVersion": 30, "version": 1 } }