#!/bin/bash # Checkpoint utilities for tree-based cookbook execution # # Usage: # source "$(dirname "${BASH_SOURCE[0]}")/checkpoint.sh" # # Provides: # - checkpoint_init() - Initialize a new checkpoint # - checkpoint_load() - Load existing checkpoint # - checkpoint_save() - Persist checkpoint to disk # - checkpoint_step_start() - Mark step as started # - checkpoint_step_complete() - Mark step as completed with output # - checkpoint_step_fail() - Mark step as failed with error # - checkpoint_get_output() - Get output from a completed step # - checkpoint_status() - Get overall checkpoint status # - checkpoint_list() - List all checkpoints # - checkpoint_delete() - Delete a checkpoint # Checkpoint directory CHECKPOINT_DIR="${CHECKPOINT_DIR:-$(dirname "${BASH_SOURCE[0]}")/../../.checkpoints}" # Ensure checkpoint directory exists _checkpoint_ensure_dir() { mkdir -p "$CHECKPOINT_DIR" } # Get checkpoint file path for a tree _checkpoint_path() { local tree_name="$1" echo "$CHECKPOINT_DIR/${tree_name}.json" } # Initialize a new checkpoint # Arguments: tree_name vars_json # Returns: run_id # Example: run_id=$(checkpoint_init "landing-page" '{"project_name": "test"}') checkpoint_init() { local tree_name="$1" local vars_json="${2:-"{}"}" _checkpoint_ensure_dir local run_id="${tree_name}-$(date +%s)" local now now=$(date -u +"%Y-%m-%dT%H:%M:%SZ") local checkpoint checkpoint=$(jq -n \ --arg tree "$tree_name" \ --arg run_id "$run_id" \ --arg started "$now" \ --argjson vars "$vars_json" \ '{ tree: $tree, run_id: $run_id, status: "pending", vars: $vars, steps: {}, started_at: $started, last_completed_step: null }') echo "$checkpoint" > "$(_checkpoint_path "$tree_name")" echo "$run_id" } # Load existing checkpoint # Arguments: tree_name # Returns: checkpoint JSON on stdout, exit 1 if not found # Example: checkpoint=$(checkpoint_load "landing-page") checkpoint_load() { local tree_name="$1" local path path="$(_checkpoint_path "$tree_name")" if [[ ! -f "$path" ]]; then return 1 fi cat "$path" } # Save checkpoint to disk # Arguments: tree_name checkpoint_json # Example: checkpoint_save "landing-page" "$checkpoint" checkpoint_save() { local tree_name="$1" local checkpoint="$2" _checkpoint_ensure_dir echo "$checkpoint" > "$(_checkpoint_path "$tree_name")" } # Mark step as started # Arguments: tree_name step_name # Example: checkpoint_step_start "landing-page" "create-project" checkpoint_step_start() { local tree_name="$1" local step_name="$2" local checkpoint checkpoint=$(checkpoint_load "$tree_name") || return 1 local now now=$(date -u +"%Y-%m-%dT%H:%M:%SZ") checkpoint=$(echo "$checkpoint" | jq \ --arg step "$step_name" \ --arg started "$now" \ '.steps[$step] = {status: "running", started_at: $started} | .status = "partial"') checkpoint_save "$tree_name" "$checkpoint" } # Mark step as completed with output # Arguments: tree_name step_name output_json # Example: checkpoint_step_complete "landing-page" "create-project" '{"project_id": "test"}' checkpoint_step_complete() { local tree_name="$1" local step_name="$2" local output_json="${3:-"{}"}" local checkpoint checkpoint=$(checkpoint_load "$tree_name") || return 1 local now now=$(date -u +"%Y-%m-%dT%H:%M:%SZ") checkpoint=$(echo "$checkpoint" | jq \ --arg step "$step_name" \ --arg completed "$now" \ --argjson output "$output_json" \ '.steps[$step].status = "completed" | .steps[$step].completed_at = $completed | .steps[$step].output = $output | .last_completed_step = $step') checkpoint_save "$tree_name" "$checkpoint" } # Mark step as failed with error # Arguments: tree_name step_name error_message # Example: checkpoint_step_fail "landing-page" "wait-pipeline" "Pipeline failed" checkpoint_step_fail() { local tree_name="$1" local step_name="$2" local error_msg="$3" local checkpoint checkpoint=$(checkpoint_load "$tree_name") || return 1 local now now=$(date -u +"%Y-%m-%dT%H:%M:%SZ") checkpoint=$(echo "$checkpoint" | jq \ --arg step "$step_name" \ --arg completed "$now" \ --arg error "$error_msg" \ '.steps[$step].status = "failed" | .steps[$step].completed_at = $completed | .steps[$step].error = $error | .status = "failed"') checkpoint_save "$tree_name" "$checkpoint" } # Get output from a completed step # Arguments: tree_name step_name [output_key] # Returns: output value or full output JSON # Example: project_id=$(checkpoint_get_output "landing-page" "create-project" "project_id") checkpoint_get_output() { local tree_name="$1" local step_name="$2" local output_key="${3:-}" local checkpoint checkpoint=$(checkpoint_load "$tree_name") || return 1 if [[ -n "$output_key" ]]; then echo "$checkpoint" | jq -r ".steps[\"$step_name\"].output[\"$output_key\"] // empty" else echo "$checkpoint" | jq ".steps[\"$step_name\"].output // {}" fi } # Get variable from checkpoint # Arguments: tree_name var_name # Returns: variable value # Example: project_name=$(checkpoint_get_var "landing-page" "project_name") checkpoint_get_var() { local tree_name="$1" local var_name="$2" local checkpoint checkpoint=$(checkpoint_load "$tree_name") || return 1 echo "$checkpoint" | jq -r ".vars[\"$var_name\"] // empty" } # Get overall checkpoint status # Arguments: tree_name # Returns: status string (pending, partial, completed, failed) # Example: status=$(checkpoint_status "landing-page") checkpoint_status() { local tree_name="$1" local checkpoint checkpoint=$(checkpoint_load "$tree_name") || { echo "none" return 0 } echo "$checkpoint" | jq -r '.status // "unknown"' } # Get detailed checkpoint status (for display) # Arguments: tree_name # Returns: formatted status output checkpoint_status_detail() { local tree_name="$1" local checkpoint checkpoint=$(checkpoint_load "$tree_name") || { echo "No checkpoint found for tree: $tree_name" return 1 } echo "$checkpoint" | jq -r ' "Tree: \(.tree)", "Run ID: \(.run_id)", "Status: \(.status)", "Started: \(.started_at)", "", "Variables:", (.vars | to_entries | .[] | " \(.key): \(.value)"), "", "Steps:", (.steps | to_entries | sort_by(.value.started_at // "") | .[] | if .value.status == "completed" then " \u001b[32m\u2713\u001b[0m \(.key): completed" elif .value.status == "failed" then " \u001b[31m\u2717\u001b[0m \(.key): failed - \(.value.error)" elif .value.status == "running" then " \u001b[33m\u25d0\u001b[0m \(.key): running..." else " \u25cb \(.key): \(.value.status)" end ), "", "Last completed: \(.last_completed_step // "none")" ' } # Get list of completed steps # Arguments: tree_name # Returns: newline-separated list of completed step names checkpoint_completed_steps() { local tree_name="$1" local checkpoint checkpoint=$(checkpoint_load "$tree_name") || return 1 echo "$checkpoint" | jq -r '.steps | to_entries | .[] | select(.value.status == "completed") | .key' } # Get list of failed steps # Arguments: tree_name # Returns: newline-separated list of failed step names checkpoint_failed_steps() { local tree_name="$1" local checkpoint checkpoint=$(checkpoint_load "$tree_name") || return 1 echo "$checkpoint" | jq -r '.steps | to_entries | .[] | select(.value.status == "failed") | .key' } # List all checkpoints # Returns: newline-separated list of tree names with checkpoints # Example: trees=$(checkpoint_list) checkpoint_list() { _checkpoint_ensure_dir for f in "$CHECKPOINT_DIR"/*.json; do [[ -e "$f" ]] || continue basename "$f" .json done } # Delete a checkpoint # Arguments: tree_name # Example: checkpoint_delete "landing-page" checkpoint_delete() { local tree_name="$1" local path path="$(_checkpoint_path "$tree_name")" if [[ -f "$path" ]]; then rm "$path" return 0 fi return 1 } # Mark entire tree as completed # Arguments: tree_name checkpoint_mark_completed() { local tree_name="$1" local checkpoint checkpoint=$(checkpoint_load "$tree_name") || return 1 local now now=$(date -u +"%Y-%m-%dT%H:%M:%SZ") checkpoint=$(echo "$checkpoint" | jq \ --arg completed "$now" \ '.status = "completed" | .completed_at = $completed') checkpoint_save "$tree_name" "$checkpoint" } # Update vars in checkpoint (for resume with different vars) # Arguments: tree_name vars_json checkpoint_update_vars() { local tree_name="$1" local vars_json="$2" local checkpoint checkpoint=$(checkpoint_load "$tree_name") || return 1 checkpoint=$(echo "$checkpoint" | jq \ --argjson new_vars "$vars_json" \ '.vars = (.vars + $new_vars)') checkpoint_save "$tree_name" "$checkpoint" }