Phase 1 delivers the complete durability and storage layer:
- WAL with crash recovery: Append-only journal with BLAKE3 checksums,
fsync guarantees, and proper seek-to-EOF on reopen
- Storage engine: sled-backed KVStore with scan_prefix for range queries
- Content-addressed storage: H:{hash}, V:{hash}, E:{hash} key patterns
- Ingestor: Background worker tailing WAL, writing to KV with 8-byte
aligned record headers for rkyv zero-copy deserialization
- Comprehensive tests: 31 tests covering crash recovery, round-trips,
and multi-cycle durability
New crates: stemedb-wal, stemedb-storage, stemedb-ingest
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
579 lines
15 KiB
YAML
579 lines
15 KiB
YAML
meta:
|
|
id: agile-agent-team
|
|
title: "Agile AI Agent Team"
|
|
subtitle: "Knowledge Coordination with Episteme"
|
|
version: "1.0.0"
|
|
|
|
actors:
|
|
research_agent:
|
|
id: RA
|
|
label: "Research Agent"
|
|
description: "Ingests external sources, stores with uncertainty"
|
|
color: "#3B82F6"
|
|
|
|
lead_orchestrator:
|
|
id: LO
|
|
label: "Lead Orchestrator"
|
|
description: "Coordinates team, routes work based on knowledge"
|
|
color: "#8B5CF6"
|
|
|
|
implementation_agent:
|
|
id: IA
|
|
label: "Implementation Agent"
|
|
description: "Writes code against current patterns"
|
|
color: "#10B981"
|
|
|
|
deploy_agent:
|
|
id: DA
|
|
label: "Deploy Agent"
|
|
description: "Deploys configurations to production"
|
|
color: "#F59E0B"
|
|
|
|
episteme:
|
|
id: E
|
|
label: "Episteme"
|
|
description: "Probabilistic knowledge graph"
|
|
color: "#FBBF24"
|
|
|
|
human:
|
|
id: H
|
|
label: "Human Supervisor"
|
|
description: "Reviews decisions, corrects errors"
|
|
color: "#EC4899"
|
|
|
|
gardener:
|
|
id: G
|
|
label: "Gardener"
|
|
description: "Background worker for TrustRank propagation"
|
|
color: "#14B8A6"
|
|
|
|
production:
|
|
id: P
|
|
label: "Production"
|
|
description: "Live system"
|
|
color: "#EF4444"
|
|
|
|
sequences:
|
|
# ============================================
|
|
# SEQUENCE 1: THE CATASTROPHE
|
|
# ============================================
|
|
- id: catastrophe
|
|
title: "The Catastrophe"
|
|
subtitle: "When Proposals Look Like Decisions"
|
|
description: |
|
|
A 47-minute production outage because an AI agent couldn't
|
|
distinguish a proposal from an approved decision.
|
|
|
|
steps:
|
|
- id: cat-1
|
|
from: research_agent
|
|
to: episteme
|
|
action: assert
|
|
label: "Store RFC finding"
|
|
data:
|
|
subject: "auth/jwt"
|
|
predicate: "signing_algorithm"
|
|
object: "ES256"
|
|
lifecycle: "proposed"
|
|
confidence: 0.75
|
|
source: "security-rfc-2024.md"
|
|
note: "RFC proposes ES256. Stored as PROPOSED."
|
|
|
|
- id: cat-2
|
|
from: lead_orchestrator
|
|
to: episteme
|
|
action: query
|
|
label: "What's the JWT algorithm?"
|
|
data:
|
|
subject: "auth/jwt"
|
|
predicate: "signing_algorithm"
|
|
lens: "recency"
|
|
danger: true
|
|
note: "No lifecycle filter. This is the bug."
|
|
callout: "lifecycle: ???"
|
|
|
|
- id: cat-3
|
|
from: episteme
|
|
to: lead_orchestrator
|
|
action: response
|
|
label: "ES256 (conf: 0.87)"
|
|
data:
|
|
value: "ES256"
|
|
confidence: 0.87
|
|
lifecycle: "proposed"
|
|
danger: true
|
|
note: "Returns the proposal. Most recent wins."
|
|
|
|
- id: cat-4
|
|
from: lead_orchestrator
|
|
to: implementation_agent
|
|
action: delegate
|
|
label: "Use ES256 for JWT"
|
|
data:
|
|
algorithm: "ES256"
|
|
confidence: 0.87
|
|
danger: true
|
|
note: "Orchestrator passes 'truth' downstream."
|
|
|
|
- id: cat-5
|
|
from: implementation_agent
|
|
to: deploy_agent
|
|
action: handoff
|
|
label: "Config ready"
|
|
data:
|
|
jwt_algorithm: "ES256"
|
|
danger: true
|
|
note: "Code written against ES256."
|
|
|
|
- id: cat-6
|
|
from: deploy_agent
|
|
to: production
|
|
action: deploy
|
|
label: "Deploy JWT config"
|
|
data:
|
|
config:
|
|
algorithm: "ES256"
|
|
danger: true
|
|
note: "Deployed with confidence. Tests passed."
|
|
|
|
- id: cat-7
|
|
from: production
|
|
to: production
|
|
action: error
|
|
label: "401 Unauthorized"
|
|
data:
|
|
error: "JWT signature validation failed"
|
|
expected: "RS256"
|
|
received: "ES256"
|
|
danger: true
|
|
note: "Auth service expects RS256. Every token fails."
|
|
callout: "3:00 AM - Pager fires"
|
|
|
|
# ============================================
|
|
# SEQUENCE 2: THE CORRECT PATH
|
|
# ============================================
|
|
- id: correct_path
|
|
title: "The Correct Path"
|
|
subtitle: "With Lifecycle Filtering"
|
|
description: |
|
|
The same scenario, but with Episteme's lifecycle filtering.
|
|
Proposals stay proposals. Approved stays approved.
|
|
|
|
steps:
|
|
- id: cor-1
|
|
from: research_agent
|
|
to: episteme
|
|
action: assert
|
|
label: "Store RFC finding"
|
|
data:
|
|
subject: "auth/jwt"
|
|
predicate: "signing_algorithm"
|
|
object: "ES256"
|
|
lifecycle: "proposed"
|
|
confidence: 0.75
|
|
note: "Same RFC. Still stored as PROPOSED."
|
|
|
|
- id: cor-2
|
|
from: lead_orchestrator
|
|
to: episteme
|
|
action: query
|
|
label: "What's the APPROVED JWT algorithm?"
|
|
data:
|
|
subject: "auth/jwt"
|
|
predicate: "signing_algorithm"
|
|
lens: "authority"
|
|
lifecycle: "approved"
|
|
success: true
|
|
note: "Lifecycle filter: approved only."
|
|
callout: "lifecycle: approved"
|
|
|
|
- id: cor-3
|
|
from: episteme
|
|
to: lead_orchestrator
|
|
action: response
|
|
label: "RS256 (conf: 0.92)"
|
|
data:
|
|
value: "RS256"
|
|
confidence: 0.92
|
|
lifecycle: "approved"
|
|
source: "production-config.yaml"
|
|
success: true
|
|
note: "Returns the approved decision. Proposal excluded."
|
|
|
|
- id: cor-4
|
|
from: lead_orchestrator
|
|
to: implementation_agent
|
|
action: delegate
|
|
label: "Use RS256 for JWT"
|
|
data:
|
|
algorithm: "RS256"
|
|
confidence: 0.92
|
|
success: true
|
|
note: "Correct algorithm propagates."
|
|
|
|
- id: cor-5
|
|
from: implementation_agent
|
|
to: episteme
|
|
action: query
|
|
label: "Pre-flight constraint check"
|
|
data:
|
|
context: "auth_jwt"
|
|
lens: "constraints"
|
|
success: true
|
|
note: "Check for forbidden patterns before coding."
|
|
|
|
- id: cor-6
|
|
from: episteme
|
|
to: implementation_agent
|
|
action: response
|
|
label: "No violations"
|
|
data:
|
|
constraints: []
|
|
clear_to_proceed: true
|
|
success: true
|
|
note: "No negative constraints for RS256."
|
|
|
|
- id: cor-7
|
|
from: implementation_agent
|
|
to: deploy_agent
|
|
action: handoff
|
|
label: "Config ready"
|
|
data:
|
|
jwt_algorithm: "RS256"
|
|
success: true
|
|
note: "Code written against RS256."
|
|
|
|
- id: cor-8
|
|
from: deploy_agent
|
|
to: production
|
|
action: deploy
|
|
label: "Deploy JWT config"
|
|
data:
|
|
config:
|
|
algorithm: "RS256"
|
|
success: true
|
|
note: "Deployed. Matches production expectation."
|
|
|
|
- id: cor-9
|
|
from: production
|
|
to: production
|
|
action: success
|
|
label: "200 OK"
|
|
data:
|
|
status: "healthy"
|
|
tokens_validated: true
|
|
success: true
|
|
note: "Auth works. No pager. Sleep continues."
|
|
|
|
# ============================================
|
|
# SEQUENCE 3: THE CORRECTION LOOP
|
|
# ============================================
|
|
- id: correction_loop
|
|
title: "The Correction Loop"
|
|
subtitle: "Tracing, Fixing, Learning"
|
|
description: |
|
|
Post-incident: Human traces the bug, corrects the record,
|
|
and the Gardener ensures agents learn from the mistake.
|
|
|
|
steps:
|
|
- id: fix-1
|
|
from: human
|
|
to: episteme
|
|
action: query
|
|
label: "Trace deploy agent queries"
|
|
data:
|
|
type: "audit"
|
|
agent_id: "deploy_agent"
|
|
time_range: "-6h"
|
|
subject: "auth/*"
|
|
note: "SRE investigates: what did the agent believe?"
|
|
|
|
- id: fix-2
|
|
from: episteme
|
|
to: human
|
|
action: response
|
|
label: "Query audit trail"
|
|
data:
|
|
query_id: "q_7f3a2b"
|
|
timestamp: "2024-01-15T21:03:47Z"
|
|
subject: "auth/jwt"
|
|
predicate: "signing_algorithm"
|
|
lens: "recency"
|
|
lifecycle_filter: null
|
|
result: "ES256"
|
|
contributing:
|
|
- hash: "rfc_2024_001"
|
|
lifecycle: "proposed"
|
|
weight: 0.9
|
|
danger: true
|
|
note: "Found it: no lifecycle filter. Proposal returned."
|
|
callout: "lifecycle_filter: null"
|
|
|
|
- id: fix-3
|
|
from: human
|
|
to: episteme
|
|
action: supersede
|
|
label: "Mark assertion incorrect"
|
|
data:
|
|
hash: "rfc_2024_001"
|
|
reason: "Proposal treated as approved decision"
|
|
type: "RequiresReview"
|
|
note: "Supersede the problematic assertion."
|
|
|
|
- id: fix-4
|
|
from: episteme
|
|
to: gardener
|
|
action: trigger
|
|
label: "Correction event"
|
|
data:
|
|
superseded_hash: "rfc_2024_001"
|
|
superseding_agent: "human_supervisor"
|
|
affected_queries: ["q_7f3a2b"]
|
|
note: "Gardener wakes up."
|
|
|
|
- id: fix-5
|
|
from: gardener
|
|
to: episteme
|
|
action: update
|
|
label: "TrustRank back-propagation"
|
|
data:
|
|
agent_id: "lead_orchestrator"
|
|
topic: "auth/jwt"
|
|
delta: -0.15
|
|
reason: "Query returned proposal as decision"
|
|
warning: true
|
|
note: "Lead Orchestrator's reputation on auth topics drops."
|
|
|
|
- id: fix-6
|
|
from: gardener
|
|
to: episteme
|
|
action: update
|
|
label: "Store negative constraint"
|
|
data:
|
|
subject: "auth/jwt"
|
|
predicate: "query_pattern"
|
|
must_use: "lifecycle=approved"
|
|
forbidden: "lifecycle=null"
|
|
reason: "Proposals must not be treated as decisions"
|
|
success: true
|
|
note: "Future queries will see this constraint."
|
|
|
|
# ============================================
|
|
# SEQUENCE 4: PERSISTENT LEARNING
|
|
# ============================================
|
|
- id: persistent_learning
|
|
title: "Persistent Learning"
|
|
subtitle: "Fixing the Optimization Conflict"
|
|
description: |
|
|
1 month later. New session. Empty context window.
|
|
But the lesson persists.
|
|
|
|
steps:
|
|
- id: learn-1
|
|
from: human
|
|
to: episteme
|
|
action: assert
|
|
label: "Correct the agent"
|
|
data:
|
|
subject: "Project_X_Http_Client"
|
|
predicate: "must_use_library"
|
|
object: "axios"
|
|
meta:
|
|
forbidden_alternative: "requests"
|
|
reason: "requests library deprecated for this project"
|
|
confidence: 1.0
|
|
lifecycle: "approved"
|
|
note: "Human stores correction with forbidden alternative."
|
|
callout: "Day 1"
|
|
|
|
- id: learn-2
|
|
from: episteme
|
|
to: gardener
|
|
action: trigger
|
|
label: "Negative constraint stored"
|
|
data:
|
|
type: "correction"
|
|
agent_corrected: "implementation_agent"
|
|
note: "Gardener sees the correction event."
|
|
|
|
- id: learn-3
|
|
from: gardener
|
|
to: episteme
|
|
action: update
|
|
label: "TrustRank penalty"
|
|
data:
|
|
agent_id: "implementation_agent"
|
|
topic: "http_libraries"
|
|
delta: -0.20
|
|
warning: true
|
|
note: "Implementation Agent's confidence on HTTP libs drops."
|
|
callout: "Learns from mistake"
|
|
|
|
- id: learn-4
|
|
from: implementation_agent
|
|
to: implementation_agent
|
|
action: start
|
|
label: "New session begins"
|
|
data:
|
|
context_window: "empty"
|
|
system_prompt: "default"
|
|
note: "30 days later. Fresh context. No memory of correction."
|
|
callout: "Day 30 - New Session"
|
|
|
|
- id: learn-5
|
|
from: implementation_agent
|
|
to: episteme
|
|
action: query
|
|
label: "Pre-flight constraint check"
|
|
data:
|
|
context: "python_http"
|
|
lens: "constraints"
|
|
success: true
|
|
note: "Before writing code, check constraints."
|
|
callout: "Automatic pre-flight"
|
|
|
|
- id: learn-6
|
|
from: episteme
|
|
to: implementation_agent
|
|
action: response
|
|
label: "Constraint found"
|
|
data:
|
|
constraints:
|
|
- subject: "Project_X_Http_Client"
|
|
must_use: "axios"
|
|
forbidden: "requests"
|
|
reason: "requests library deprecated for this project"
|
|
confidence: 1.0
|
|
success: true
|
|
note: "The correction from Day 1 is still there."
|
|
callout: "Survived context window!"
|
|
|
|
- id: learn-7
|
|
from: implementation_agent
|
|
to: implementation_agent
|
|
action: generate
|
|
label: "Write code with axios"
|
|
data:
|
|
import: "axios"
|
|
avoided: "requests"
|
|
success: true
|
|
note: "Agent uses axios. Constraint honored."
|
|
|
|
- id: learn-8
|
|
from: episteme
|
|
to: episteme
|
|
action: resurrect
|
|
label: "Resurrection"
|
|
data:
|
|
constraint_hash: "axios_constraint"
|
|
last_verified: "now"
|
|
confidence: 1.0
|
|
success: true
|
|
note: "Constraint used successfully. Stays fresh forever."
|
|
callout: "Resurrection"
|
|
|
|
# ============================================
|
|
# SEQUENCE 5: TIME TRAVEL DEBUGGING
|
|
# ============================================
|
|
- id: time_travel
|
|
title: "Time Travel Debugging"
|
|
subtitle: "What Did We Believe Then?"
|
|
description: |
|
|
3:00 AM incident investigation. The SRE needs to know
|
|
what the system believed 6 hours ago, not now.
|
|
|
|
steps:
|
|
- id: tt-1
|
|
from: human
|
|
to: episteme
|
|
action: query
|
|
label: "What's the current JWT algorithm?"
|
|
data:
|
|
subject: "auth/jwt"
|
|
predicate: "signing_algorithm"
|
|
lifecycle: "approved"
|
|
note: "Current state shows RS256 (post-fix)."
|
|
|
|
- id: tt-2
|
|
from: episteme
|
|
to: human
|
|
action: response
|
|
label: "RS256 (current)"
|
|
data:
|
|
value: "RS256"
|
|
confidence: 0.95
|
|
note: "This is useless for debugging. We need history."
|
|
|
|
- id: tt-3
|
|
from: human
|
|
to: episteme
|
|
action: query
|
|
label: "What did we believe at 9pm?"
|
|
data:
|
|
subject: "auth/jwt"
|
|
predicate: "signing_algorithm"
|
|
as_of: "2024-01-15T21:00:00Z"
|
|
note: "Time-travel query."
|
|
callout: "as_of: 9pm"
|
|
|
|
- id: tt-4
|
|
from: episteme
|
|
to: human
|
|
action: response
|
|
label: "ES256 (at 9pm)"
|
|
data:
|
|
value: "ES256"
|
|
confidence: 0.87
|
|
lifecycle: "proposed"
|
|
as_of: "2024-01-15T21:00:00Z"
|
|
danger: true
|
|
note: "At 9pm, the system believed ES256 was correct."
|
|
callout: "Found the state at incident time"
|
|
|
|
- id: tt-5
|
|
from: human
|
|
to: episteme
|
|
action: query
|
|
label: "What changed in last 24h?"
|
|
data:
|
|
type: "diff"
|
|
subject: "auth/jwt"
|
|
from: "-24h"
|
|
note: "Diff view for change analysis."
|
|
|
|
- id: tt-6
|
|
from: episteme
|
|
to: human
|
|
action: response
|
|
label: "Diff result"
|
|
data:
|
|
added:
|
|
- hash: "rfc_2024_001"
|
|
value: "ES256"
|
|
lifecycle: "proposed"
|
|
added_at: "2024-01-15T14:30:00Z"
|
|
unchanged:
|
|
- hash: "prod_config_v2"
|
|
value: "RS256"
|
|
lifecycle: "approved"
|
|
success: true
|
|
note: "Clear view: RFC added at 2:30pm caused the issue."
|
|
|
|
annotations:
|
|
danger:
|
|
color: "#EF4444"
|
|
label: "Problem"
|
|
icon: "alert-triangle"
|
|
warning:
|
|
color: "#F59E0B"
|
|
label: "Warning"
|
|
icon: "alert-circle"
|
|
success:
|
|
color: "#10B981"
|
|
label: "Success"
|
|
icon: "check-circle"
|
|
info:
|
|
color: "#3B82F6"
|
|
label: "Info"
|
|
icon: "info"
|