## Phase 8: Enterprise Extractor Improvements ✅ - 14 security extractors (TLS, JWT, SQL injection, XSS, etc.) - 10 framework-specific extractors (Spring, Django, Rails, etc.) - Config file security detection (YAML, TOML) ## Phase 9: Autonomous Extractor Generation ✅ - Shadow mode executor with TP/FP tracking - Graduation pipeline with confidence thresholds - Auto-rollback on regression detection - Cross-project pattern syncing ## UAT Suite Complete (14 scripts, 90 tests) - test-core-detection.sh (6 tests) - test-declarative-extractors.sh (5 tests) - test-domain-frameworks.sh (5 tests) - test-domain-unreal.sh (3 tests) - test-llm-extraction.sh (6 tests) - test-eval-harness.sh (5 tests) - test-cross-language.sh (3 tests) - test-precommit-performance.sh (4 tests) - test-output-formats.sh (8 tests) - test-drift-detection.sh (6 tests) - test-exit-codes.sh (12 tests) + 3 more scripts ## Other Changes - Updated roadmap to mark Phase 8-9 complete - Added .gitignore entries for build artifacts - Updated pre-commit: 800 line limit, exclude tests/data/cmd Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
218 lines
6.5 KiB
Go
218 lines
6.5 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"flag"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"pitch-voiceover/pkg/elevenlabs"
|
|
)
|
|
|
|
type SpeakerConfig struct {
|
|
Name string
|
|
Description string
|
|
Dir string
|
|
}
|
|
|
|
var speakers = map[string]SpeakerConfig{
|
|
"1": {
|
|
Name: "StemeDB Presenter",
|
|
Description: "A warm, authoritative American male in his early 40s. Confident but approachable, like a trusted industry expert giving a presentation. Clear enunciation, measured pace, professional but not stiff. The voice of someone who has seen the problems firsthand and has a solution.",
|
|
Dir: "speaker-1",
|
|
},
|
|
"2": {
|
|
Name: "StemeDB Presenter v2",
|
|
Description: "A deep, resonant American male voice with broadcast-quality clarity. Crystal clear diction, zero background noise, studio-perfect audio. The voice of a seasoned executive presenter - warm yet authoritative, with the polished delivery of a Fortune 500 keynote speaker. Natural pacing with confident pauses. Think: NPR host meets tech CEO.",
|
|
Dir: "speaker-2",
|
|
},
|
|
"3": {
|
|
Name: "StemeDB Presenter v3",
|
|
Description: "American male, early 40s, deep smooth voice. Studio-quality recording with perfect audio clarity. Warm and authoritative tone. Clear precise diction with natural pacing. Professional narrator delivery, confident and measured. No breathiness, crystal clear pronunciation.",
|
|
Dir: "speaker-3",
|
|
},
|
|
"4": {
|
|
Name: "StemeDB Presenter v4",
|
|
Description: "Male baritone voice, age 45, rich and full tone. Speaks slowly and deliberately like a documentary narrator. Strong consonants, open vowels. American midwest accent, neutral and professional. Clean studio recording quality.",
|
|
Dir: "speaker-4",
|
|
},
|
|
"5": {
|
|
Name: "StemeDB Presenter v5",
|
|
Description: "Older British male, late 50s, distinguished and gravelly. Speaks with quiet authority like a veteran BBC journalist. Measured, thoughtful pacing. Slight rasp, very warm. Perfect studio audio quality.",
|
|
Dir: "speaker-5",
|
|
},
|
|
"6": {
|
|
Name: "StemeDB Presenter v6",
|
|
Description: "User-selected voice",
|
|
Dir: "speaker-6",
|
|
},
|
|
}
|
|
|
|
const outputBase = "../../applications/pitch/audio"
|
|
|
|
func main() {
|
|
list := flag.Bool("list", false, "List all speaking blocks")
|
|
voiceOnly := flag.Bool("voice-only", false, "Create or find voice only, don't generate audio")
|
|
single := flag.Int("single", 0, "Generate only a single block by number")
|
|
voiceID := flag.String("voice-id", "", "Use specific voice ID instead of finding/creating")
|
|
speaker := flag.String("speaker", "1", "Speaker variant (1, 2, 3, or 4)")
|
|
flag.Parse()
|
|
|
|
if *list {
|
|
listBlocks()
|
|
return
|
|
}
|
|
|
|
cfg, ok := speakers[*speaker]
|
|
if !ok {
|
|
fmt.Fprintf(os.Stderr, "Error: unknown speaker '%s' (use 1, 2, 3, or 4)\n", *speaker)
|
|
os.Exit(1)
|
|
}
|
|
|
|
apiKey := os.Getenv("ELEVENLABS_API_KEY")
|
|
if apiKey == "" {
|
|
fmt.Fprintln(os.Stderr, "Error: ELEVENLABS_API_KEY environment variable is required")
|
|
os.Exit(1)
|
|
}
|
|
|
|
client, err := elevenlabs.NewClient(elevenlabs.Config{
|
|
APIKey: apiKey,
|
|
Timeout: 120 * time.Second,
|
|
})
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error creating client: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
ctx := context.Background()
|
|
|
|
// Health check
|
|
if err := client.Health(ctx); err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error connecting to ElevenLabs: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
fmt.Println("✓ Connected to ElevenLabs API")
|
|
|
|
// Find or use provided voice ID
|
|
vid := *voiceID
|
|
if vid == "" {
|
|
vid, err = findOrCreateVoice(ctx, client, cfg)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error with voice: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
fmt.Printf("✓ Using voice: %s\n", vid)
|
|
|
|
if *voiceOnly {
|
|
return
|
|
}
|
|
|
|
// Create output directory
|
|
outputDir := filepath.Join(outputBase, cfg.Dir)
|
|
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Generate audio
|
|
blocks := Script
|
|
if *single > 0 {
|
|
for _, b := range Script {
|
|
if b.Number == *single {
|
|
blocks = []SpeakingBlock{b}
|
|
break
|
|
}
|
|
}
|
|
if len(blocks) == len(Script) {
|
|
fmt.Fprintf(os.Stderr, "Error: block %d not found\n", *single)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
// DON'T override VoiceSettings - let the designed voice use its natural characteristics
|
|
// Previously we used high Stability (0.80) + low Style (0.05) which normalized all voices
|
|
var settings *elevenlabs.VoiceSettings // nil = use voice's default settings
|
|
|
|
for _, block := range blocks {
|
|
filename := fmt.Sprintf("%02d-%s.mp3", block.Number, block.Slug)
|
|
outPath := filepath.Join(outputDir, filename)
|
|
|
|
fmt.Printf("Generating %s...\n", filename)
|
|
|
|
audio, err := client.TextToSpeechWithFormat(ctx, vid, elevenlabs.TextToSpeechRequest{
|
|
Text: block.Text,
|
|
ModelID: elevenlabs.ModelMultilingualV2,
|
|
VoiceSettings: settings,
|
|
}, elevenlabs.FormatMP3_44100_128)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, " Error: %v\n", err)
|
|
continue
|
|
}
|
|
|
|
if err := os.WriteFile(outPath, audio, 0644); err != nil {
|
|
fmt.Fprintf(os.Stderr, " Error writing file: %v\n", err)
|
|
continue
|
|
}
|
|
|
|
fmt.Printf(" ✓ %s (%d bytes)\n", filename, len(audio))
|
|
}
|
|
|
|
fmt.Printf("\n✓ Done! Audio files in %s\n", outputDir)
|
|
}
|
|
|
|
func listBlocks() {
|
|
fmt.Printf("%-4s %-20s %s\n", "#", "Slug", "Step")
|
|
fmt.Println(strings.Repeat("-", 60))
|
|
for _, b := range Script {
|
|
fmt.Printf("%-4d %-20s %s\n", b.Number, b.Slug, b.Step)
|
|
}
|
|
fmt.Printf("\nTotal: %d blocks\n", len(Script))
|
|
}
|
|
|
|
func findOrCreateVoice(ctx context.Context, client *elevenlabs.Client, cfg SpeakerConfig) (string, error) {
|
|
// Check for existing voice
|
|
voices, err := client.ListVoices(ctx)
|
|
if err != nil {
|
|
return "", fmt.Errorf("list voices: %w", err)
|
|
}
|
|
|
|
for _, v := range voices {
|
|
if v.Name == cfg.Name {
|
|
fmt.Printf("✓ Found existing voice: %s\n", v.VoiceID)
|
|
return v.VoiceID, nil
|
|
}
|
|
}
|
|
|
|
// Design new voice
|
|
fmt.Printf("Creating new voice '%s'...\n", cfg.Name)
|
|
designResp, err := client.DesignVoice(ctx, elevenlabs.VoiceDesignRequest{
|
|
VoiceDescription: cfg.Description,
|
|
AutoGenerateText: true,
|
|
GuidanceScale: 3.0,
|
|
})
|
|
if err != nil {
|
|
return "", fmt.Errorf("design voice: %w", err)
|
|
}
|
|
|
|
if len(designResp.Previews) == 0 {
|
|
return "", fmt.Errorf("no voice previews generated")
|
|
}
|
|
|
|
// Save the first preview
|
|
saveResp, err := client.SaveDesignedVoice(ctx, elevenlabs.SaveVoiceRequest{
|
|
VoiceName: cfg.Name,
|
|
VoiceDescription: cfg.Description,
|
|
GeneratedVoiceID: designResp.Previews[0].GeneratedVoiceID,
|
|
})
|
|
if err != nil {
|
|
return "", fmt.Errorf("save voice: %w", err)
|
|
}
|
|
|
|
fmt.Printf("✓ Created new voice: %s\n", saveResp.VoiceID)
|
|
return saveResp.VoiceID, nil
|
|
}
|