rdev/internal/service/diagnostics_service.go
jordan d69da6d627 feat: add structured logging infrastructure and SDLC extensions
Major changes:
- Add internal/logging package with field constants, context propagation,
  sensitive data auto-redaction, and per-component log levels
- Add worker timeout constants (TimeoutQuickOp, TimeoutHealthCheck, etc.)
- Extend SDLC with callback handlers, generate endpoints, and executor
- Add new cookbook trees for aeries and slackpath progression
- Add skeleton templates for queue, realtime, and microservices
- Add worker component template with async job processing
- Refactor services and handlers to use new logging infrastructure
- Split component.go into component_infra.go and component_listing.go

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 22:56:04 -07:00

290 lines
7.8 KiB
Go

// Package service provides business logic services.
package service
import (
"context"
"fmt"
"time"
"github.com/orchard9/rdev/internal/domain"
"github.com/orchard9/rdev/internal/logging"
"github.com/orchard9/rdev/internal/port"
)
// DiagnosticsServiceConfig configures the diagnostics service.
type DiagnosticsServiceConfig struct {
// DefaultGitOwner is the git organization for CI lookups.
DefaultGitOwner string
// MaxRecentOperations is how many operations to include.
MaxRecentOperations int
// MaxRecentPipelines is how many pipelines to include.
MaxRecentPipelines int
}
// DiagnosticsService aggregates project health information from multiple sources.
type DiagnosticsService struct {
operationRepo port.OperationRepository
registryChecker port.RegistryChecker
ciProvider port.CIProvider
defaultGitOwner string
maxRecentOperations int
maxRecentPipelines int
}
// NewDiagnosticsService creates a new diagnostics service.
func NewDiagnosticsService(
operationRepo port.OperationRepository,
registryChecker port.RegistryChecker,
ciProvider port.CIProvider,
cfg DiagnosticsServiceConfig,
) *DiagnosticsService {
maxOps := cfg.MaxRecentOperations
if maxOps <= 0 {
maxOps = 10
}
maxPipelines := cfg.MaxRecentPipelines
if maxPipelines <= 0 {
maxPipelines = 5
}
return &DiagnosticsService{
operationRepo: operationRepo,
registryChecker: registryChecker,
ciProvider: ciProvider,
defaultGitOwner: cfg.DefaultGitOwner,
maxRecentOperations: maxOps,
maxRecentPipelines: maxPipelines,
}
}
// GetDiagnostics returns comprehensive health information for a project.
func (s *DiagnosticsService) GetDiagnostics(ctx context.Context, projectID string) (*domain.ProjectDiagnostics, error) {
diag := &domain.ProjectDiagnostics{
ProjectID: projectID,
GeneratedAt: time.Now().UTC(),
Summary: domain.DiagnosticsSummaryHealthy,
Issues: []domain.DiagnosticIssue{},
}
// Collect data from each source (don't fail if one source fails)
s.collectOperations(ctx, projectID, diag)
s.collectRegistryHealth(ctx, diag)
s.collectCIStatus(ctx, projectID, diag)
// Determine overall summary
s.calculateSummary(diag)
return diag, nil
}
// collectOperations fetches recent operations and extracts issues.
func (s *DiagnosticsService) collectOperations(ctx context.Context, projectID string, diag *domain.ProjectDiagnostics) {
filter := domain.OperationFilters{
ProjectID: projectID,
Limit: s.maxRecentOperations,
}
ops, err := s.operationRepo.List(ctx, filter)
if err != nil {
log := logging.FromContext(ctx).WithService("diagnostics")
log.Warn("failed to fetch operations for diagnostics",
logging.FieldError, err,
logging.FieldProjectID, projectID,
)
diag.Issues = append(diag.Issues, domain.DiagnosticIssue{
Severity: domain.DiagnosticSeverityWarning,
Source: domain.DiagnosticSourceOperation,
Message: "Unable to fetch operation history",
Details: err.Error(),
})
return
}
// Convert to summaries
for _, op := range ops {
summary := domain.OperationSummary{
ID: op.ID,
Type: op.Type,
Status: op.Status,
StartedAt: op.StartedAt,
DurationMs: op.DurationMs,
Error: op.Error,
ExternalRef: op.ExternalRef,
}
diag.RecentOperations = append(diag.RecentOperations, summary)
// Extract issues from failed operations
if op.Status == domain.OperationStatusFailed {
issue := domain.DiagnosticIssue{
Severity: domain.DiagnosticSeverityError,
Source: domain.DiagnosticSourceOperation,
Message: fmt.Sprintf("%s operation failed", op.Type),
Timestamp: op.StartedAt,
}
if op.Error != "" {
issue.Details = op.Error
}
if op.ExternalRef != "" {
issue.Message += fmt.Sprintf(" (%s)", op.ExternalRef)
}
diag.Issues = append(diag.Issues, issue)
}
}
}
// collectRegistryHealth checks registry status.
func (s *DiagnosticsService) collectRegistryHealth(ctx context.Context, diag *domain.ProjectDiagnostics) {
if s.registryChecker == nil {
return
}
status := s.registryChecker.Check(ctx)
diag.Registry = &status
if !status.Healthy {
diag.Issues = append(diag.Issues, domain.DiagnosticIssue{
Severity: domain.DiagnosticSeverityError,
Source: domain.DiagnosticSourceRegistry,
Message: "Container registry unhealthy",
Details: status.Error,
Timestamp: status.LastChecked,
})
}
}
// collectCIStatus fetches CI pipeline information.
func (s *DiagnosticsService) collectCIStatus(ctx context.Context, projectID string, diag *domain.ProjectDiagnostics) {
if s.ciProvider == nil {
diag.CI = &domain.CIDiagnostics{Available: false}
return
}
owner := s.defaultGitOwner
if owner == "" {
owner = "jordan" // fallback
}
ciDiag := &domain.CIDiagnostics{Available: true}
pipelines, err := s.ciProvider.ListPipelines(ctx, owner, projectID)
if err != nil {
log := logging.FromContext(ctx).WithService("diagnostics")
log.Warn("failed to fetch pipelines for diagnostics",
logging.FieldError, err,
logging.FieldProjectID, projectID,
)
ciDiag.Available = false
diag.CI = ciDiag
return
}
// Convert to summaries and find failures
var lastFailure *domain.CIPipeline
for i, p := range pipelines {
if i >= s.maxRecentPipelines {
break
}
summary := domain.CIPipelineSummary{
Number: p.Number,
Status: p.Status,
Branch: p.Branch,
Commit: p.Commit,
StartedAt: p.Started,
}
if p.Finished.After(p.Started) {
summary.Duration = p.Finished.Sub(p.Started).Round(time.Second).String()
}
ciDiag.RecentPipelines = append(ciDiag.RecentPipelines, summary)
// Track the most recent failure
if p.Status == "failure" && lastFailure == nil {
lastFailure = p
}
}
// Get details on the last failure
if lastFailure != nil {
failure := s.getFailureDetails(ctx, owner, projectID, lastFailure)
ciDiag.LastFailure = failure
// Add as issue
issue := domain.DiagnosticIssue{
Severity: domain.DiagnosticSeverityError,
Source: domain.DiagnosticSourceCI,
Message: fmt.Sprintf("CI build #%d failed", lastFailure.Number),
Timestamp: lastFailure.Finished,
}
if failure != nil && failure.FailedStep != "" {
issue.Message += fmt.Sprintf(" at step '%s'", failure.FailedStep)
if failure.Error != "" {
issue.Details = failure.Error
}
}
diag.Issues = append(diag.Issues, issue)
}
diag.CI = ciDiag
}
// getFailureDetails fetches step-level details for a failed pipeline.
func (s *DiagnosticsService) getFailureDetails(ctx context.Context, owner, repo string, pipeline *domain.CIPipeline) *domain.CIPipelineFailure {
failure := &domain.CIPipelineFailure{
Number: pipeline.Number,
Timestamp: pipeline.Finished,
}
steps, err := s.ciProvider.GetPipelineSteps(ctx, owner, repo, pipeline.Number)
if err != nil {
log := logging.FromContext(ctx).WithService("diagnostics")
log.Warn("failed to fetch pipeline steps",
logging.FieldError, err,
"pipeline", pipeline.Number,
)
return failure
}
failure.URL = steps.URL
// Find the failed step
for _, step := range steps.Steps {
if step.Status == "failure" || step.Status == "error" {
failure.FailedStep = step.Name
failure.Error = step.Error
if step.Log != "" {
failure.LogTail = step.Log
}
break
}
}
return failure
}
// calculateSummary determines the overall health status.
func (s *DiagnosticsService) calculateSummary(diag *domain.ProjectDiagnostics) {
errorCount := 0
warningCount := 0
for _, issue := range diag.Issues {
switch issue.Severity {
case domain.DiagnosticSeverityError:
errorCount++
case domain.DiagnosticSeverityWarning:
warningCount++
}
}
if errorCount > 0 {
diag.Summary = domain.DiagnosticsSummaryUnhealthy
} else if warningCount > 0 {
diag.Summary = domain.DiagnosticsSummaryDegraded
} else {
diag.Summary = domain.DiagnosticsSummaryHealthy
}
}