stemedb/cmd/pitch-voiceover/main.go
jordan 157dbbb9eb feat: Complete Aphoria Phase 8-9 + UAT suite (90/90 tests passing)
## Phase 8: Enterprise Extractor Improvements 
- 14 security extractors (TLS, JWT, SQL injection, XSS, etc.)
- 10 framework-specific extractors (Spring, Django, Rails, etc.)
- Config file security detection (YAML, TOML)

## Phase 9: Autonomous Extractor Generation 
- Shadow mode executor with TP/FP tracking
- Graduation pipeline with confidence thresholds
- Auto-rollback on regression detection
- Cross-project pattern syncing

## UAT Suite Complete (14 scripts, 90 tests)
- test-core-detection.sh (6 tests)
- test-declarative-extractors.sh (5 tests)
- test-domain-frameworks.sh (5 tests)
- test-domain-unreal.sh (3 tests)
- test-llm-extraction.sh (6 tests)
- test-eval-harness.sh (5 tests)
- test-cross-language.sh (3 tests)
- test-precommit-performance.sh (4 tests)
- test-output-formats.sh (8 tests)
- test-drift-detection.sh (6 tests)
- test-exit-codes.sh (12 tests)
+ 3 more scripts

## Other Changes
- Updated roadmap to mark Phase 8-9 complete
- Added .gitignore entries for build artifacts
- Updated pre-commit: 800 line limit, exclude tests/data/cmd

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-06 22:50:55 -07:00

218 lines
6.5 KiB
Go

package main
import (
"context"
"flag"
"fmt"
"os"
"path/filepath"
"strings"
"time"
"pitch-voiceover/pkg/elevenlabs"
)
type SpeakerConfig struct {
Name string
Description string
Dir string
}
var speakers = map[string]SpeakerConfig{
"1": {
Name: "StemeDB Presenter",
Description: "A warm, authoritative American male in his early 40s. Confident but approachable, like a trusted industry expert giving a presentation. Clear enunciation, measured pace, professional but not stiff. The voice of someone who has seen the problems firsthand and has a solution.",
Dir: "speaker-1",
},
"2": {
Name: "StemeDB Presenter v2",
Description: "A deep, resonant American male voice with broadcast-quality clarity. Crystal clear diction, zero background noise, studio-perfect audio. The voice of a seasoned executive presenter - warm yet authoritative, with the polished delivery of a Fortune 500 keynote speaker. Natural pacing with confident pauses. Think: NPR host meets tech CEO.",
Dir: "speaker-2",
},
"3": {
Name: "StemeDB Presenter v3",
Description: "American male, early 40s, deep smooth voice. Studio-quality recording with perfect audio clarity. Warm and authoritative tone. Clear precise diction with natural pacing. Professional narrator delivery, confident and measured. No breathiness, crystal clear pronunciation.",
Dir: "speaker-3",
},
"4": {
Name: "StemeDB Presenter v4",
Description: "Male baritone voice, age 45, rich and full tone. Speaks slowly and deliberately like a documentary narrator. Strong consonants, open vowels. American midwest accent, neutral and professional. Clean studio recording quality.",
Dir: "speaker-4",
},
"5": {
Name: "StemeDB Presenter v5",
Description: "Older British male, late 50s, distinguished and gravelly. Speaks with quiet authority like a veteran BBC journalist. Measured, thoughtful pacing. Slight rasp, very warm. Perfect studio audio quality.",
Dir: "speaker-5",
},
"6": {
Name: "StemeDB Presenter v6",
Description: "User-selected voice",
Dir: "speaker-6",
},
}
const outputBase = "../../applications/pitch/audio"
func main() {
list := flag.Bool("list", false, "List all speaking blocks")
voiceOnly := flag.Bool("voice-only", false, "Create or find voice only, don't generate audio")
single := flag.Int("single", 0, "Generate only a single block by number")
voiceID := flag.String("voice-id", "", "Use specific voice ID instead of finding/creating")
speaker := flag.String("speaker", "1", "Speaker variant (1, 2, 3, or 4)")
flag.Parse()
if *list {
listBlocks()
return
}
cfg, ok := speakers[*speaker]
if !ok {
fmt.Fprintf(os.Stderr, "Error: unknown speaker '%s' (use 1, 2, 3, or 4)\n", *speaker)
os.Exit(1)
}
apiKey := os.Getenv("ELEVENLABS_API_KEY")
if apiKey == "" {
fmt.Fprintln(os.Stderr, "Error: ELEVENLABS_API_KEY environment variable is required")
os.Exit(1)
}
client, err := elevenlabs.NewClient(elevenlabs.Config{
APIKey: apiKey,
Timeout: 120 * time.Second,
})
if err != nil {
fmt.Fprintf(os.Stderr, "Error creating client: %v\n", err)
os.Exit(1)
}
ctx := context.Background()
// Health check
if err := client.Health(ctx); err != nil {
fmt.Fprintf(os.Stderr, "Error connecting to ElevenLabs: %v\n", err)
os.Exit(1)
}
fmt.Println("✓ Connected to ElevenLabs API")
// Find or use provided voice ID
vid := *voiceID
if vid == "" {
vid, err = findOrCreateVoice(ctx, client, cfg)
if err != nil {
fmt.Fprintf(os.Stderr, "Error with voice: %v\n", err)
os.Exit(1)
}
}
fmt.Printf("✓ Using voice: %s\n", vid)
if *voiceOnly {
return
}
// Create output directory
outputDir := filepath.Join(outputBase, cfg.Dir)
if err := os.MkdirAll(outputDir, 0755); err != nil {
fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err)
os.Exit(1)
}
// Generate audio
blocks := Script
if *single > 0 {
for _, b := range Script {
if b.Number == *single {
blocks = []SpeakingBlock{b}
break
}
}
if len(blocks) == len(Script) {
fmt.Fprintf(os.Stderr, "Error: block %d not found\n", *single)
os.Exit(1)
}
}
// DON'T override VoiceSettings - let the designed voice use its natural characteristics
// Previously we used high Stability (0.80) + low Style (0.05) which normalized all voices
var settings *elevenlabs.VoiceSettings // nil = use voice's default settings
for _, block := range blocks {
filename := fmt.Sprintf("%02d-%s.mp3", block.Number, block.Slug)
outPath := filepath.Join(outputDir, filename)
fmt.Printf("Generating %s...\n", filename)
audio, err := client.TextToSpeechWithFormat(ctx, vid, elevenlabs.TextToSpeechRequest{
Text: block.Text,
ModelID: elevenlabs.ModelMultilingualV2,
VoiceSettings: settings,
}, elevenlabs.FormatMP3_44100_128)
if err != nil {
fmt.Fprintf(os.Stderr, " Error: %v\n", err)
continue
}
if err := os.WriteFile(outPath, audio, 0644); err != nil {
fmt.Fprintf(os.Stderr, " Error writing file: %v\n", err)
continue
}
fmt.Printf(" ✓ %s (%d bytes)\n", filename, len(audio))
}
fmt.Printf("\n✓ Done! Audio files in %s\n", outputDir)
}
func listBlocks() {
fmt.Printf("%-4s %-20s %s\n", "#", "Slug", "Step")
fmt.Println(strings.Repeat("-", 60))
for _, b := range Script {
fmt.Printf("%-4d %-20s %s\n", b.Number, b.Slug, b.Step)
}
fmt.Printf("\nTotal: %d blocks\n", len(Script))
}
func findOrCreateVoice(ctx context.Context, client *elevenlabs.Client, cfg SpeakerConfig) (string, error) {
// Check for existing voice
voices, err := client.ListVoices(ctx)
if err != nil {
return "", fmt.Errorf("list voices: %w", err)
}
for _, v := range voices {
if v.Name == cfg.Name {
fmt.Printf("✓ Found existing voice: %s\n", v.VoiceID)
return v.VoiceID, nil
}
}
// Design new voice
fmt.Printf("Creating new voice '%s'...\n", cfg.Name)
designResp, err := client.DesignVoice(ctx, elevenlabs.VoiceDesignRequest{
VoiceDescription: cfg.Description,
AutoGenerateText: true,
GuidanceScale: 3.0,
})
if err != nil {
return "", fmt.Errorf("design voice: %w", err)
}
if len(designResp.Previews) == 0 {
return "", fmt.Errorf("no voice previews generated")
}
// Save the first preview
saveResp, err := client.SaveDesignedVoice(ctx, elevenlabs.SaveVoiceRequest{
VoiceName: cfg.Name,
VoiceDescription: cfg.Description,
GeneratedVoiceID: designResp.Previews[0].GeneratedVoiceID,
})
if err != nil {
return "", fmt.Errorf("save voice: %w", err)
}
fmt.Printf("✓ Created new voice: %s\n", saveResp.VoiceID)
return saveResp.VoiceID, nil
}