package main import ( "context" "flag" "fmt" "os" "path/filepath" "strings" "time" "pitch-voiceover/pkg/elevenlabs" ) type SpeakerConfig struct { Name string Description string Dir string } var speakers = map[string]SpeakerConfig{ "1": { Name: "StemeDB Presenter", Description: "A warm, authoritative American male in his early 40s. Confident but approachable, like a trusted industry expert giving a presentation. Clear enunciation, measured pace, professional but not stiff. The voice of someone who has seen the problems firsthand and has a solution.", Dir: "speaker-1", }, "2": { Name: "StemeDB Presenter v2", Description: "A deep, resonant American male voice with broadcast-quality clarity. Crystal clear diction, zero background noise, studio-perfect audio. The voice of a seasoned executive presenter - warm yet authoritative, with the polished delivery of a Fortune 500 keynote speaker. Natural pacing with confident pauses. Think: NPR host meets tech CEO.", Dir: "speaker-2", }, "3": { Name: "StemeDB Presenter v3", Description: "American male, early 40s, deep smooth voice. Studio-quality recording with perfect audio clarity. Warm and authoritative tone. Clear precise diction with natural pacing. Professional narrator delivery, confident and measured. No breathiness, crystal clear pronunciation.", Dir: "speaker-3", }, "4": { Name: "StemeDB Presenter v4", Description: "Male baritone voice, age 45, rich and full tone. Speaks slowly and deliberately like a documentary narrator. Strong consonants, open vowels. American midwest accent, neutral and professional. Clean studio recording quality.", Dir: "speaker-4", }, "5": { Name: "StemeDB Presenter v5", Description: "Older British male, late 50s, distinguished and gravelly. Speaks with quiet authority like a veteran BBC journalist. Measured, thoughtful pacing. Slight rasp, very warm. Perfect studio audio quality.", Dir: "speaker-5", }, "6": { Name: "StemeDB Presenter v6", Description: "User-selected voice", Dir: "speaker-6", }, } const outputBase = "../../applications/pitch/audio" func main() { list := flag.Bool("list", false, "List all speaking blocks") voiceOnly := flag.Bool("voice-only", false, "Create or find voice only, don't generate audio") single := flag.Int("single", 0, "Generate only a single block by number") voiceID := flag.String("voice-id", "", "Use specific voice ID instead of finding/creating") speaker := flag.String("speaker", "1", "Speaker variant (1, 2, 3, or 4)") flag.Parse() if *list { listBlocks() return } cfg, ok := speakers[*speaker] if !ok { fmt.Fprintf(os.Stderr, "Error: unknown speaker '%s' (use 1, 2, 3, or 4)\n", *speaker) os.Exit(1) } apiKey := os.Getenv("ELEVENLABS_API_KEY") if apiKey == "" { fmt.Fprintln(os.Stderr, "Error: ELEVENLABS_API_KEY environment variable is required") os.Exit(1) } client, err := elevenlabs.NewClient(elevenlabs.Config{ APIKey: apiKey, Timeout: 120 * time.Second, }) if err != nil { fmt.Fprintf(os.Stderr, "Error creating client: %v\n", err) os.Exit(1) } ctx := context.Background() // Health check if err := client.Health(ctx); err != nil { fmt.Fprintf(os.Stderr, "Error connecting to ElevenLabs: %v\n", err) os.Exit(1) } fmt.Println("✓ Connected to ElevenLabs API") // Find or use provided voice ID vid := *voiceID if vid == "" { vid, err = findOrCreateVoice(ctx, client, cfg) if err != nil { fmt.Fprintf(os.Stderr, "Error with voice: %v\n", err) os.Exit(1) } } fmt.Printf("✓ Using voice: %s\n", vid) if *voiceOnly { return } // Create output directory outputDir := filepath.Join(outputBase, cfg.Dir) if err := os.MkdirAll(outputDir, 0755); err != nil { fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err) os.Exit(1) } // Generate audio blocks := Script if *single > 0 { for _, b := range Script { if b.Number == *single { blocks = []SpeakingBlock{b} break } } if len(blocks) == len(Script) { fmt.Fprintf(os.Stderr, "Error: block %d not found\n", *single) os.Exit(1) } } // DON'T override VoiceSettings - let the designed voice use its natural characteristics // Previously we used high Stability (0.80) + low Style (0.05) which normalized all voices var settings *elevenlabs.VoiceSettings // nil = use voice's default settings for _, block := range blocks { filename := fmt.Sprintf("%02d-%s.mp3", block.Number, block.Slug) outPath := filepath.Join(outputDir, filename) fmt.Printf("Generating %s...\n", filename) audio, err := client.TextToSpeechWithFormat(ctx, vid, elevenlabs.TextToSpeechRequest{ Text: block.Text, ModelID: elevenlabs.ModelMultilingualV2, VoiceSettings: settings, }, elevenlabs.FormatMP3_44100_128) if err != nil { fmt.Fprintf(os.Stderr, " Error: %v\n", err) continue } if err := os.WriteFile(outPath, audio, 0644); err != nil { fmt.Fprintf(os.Stderr, " Error writing file: %v\n", err) continue } fmt.Printf(" ✓ %s (%d bytes)\n", filename, len(audio)) } fmt.Printf("\n✓ Done! Audio files in %s\n", outputDir) } func listBlocks() { fmt.Printf("%-4s %-20s %s\n", "#", "Slug", "Step") fmt.Println(strings.Repeat("-", 60)) for _, b := range Script { fmt.Printf("%-4d %-20s %s\n", b.Number, b.Slug, b.Step) } fmt.Printf("\nTotal: %d blocks\n", len(Script)) } func findOrCreateVoice(ctx context.Context, client *elevenlabs.Client, cfg SpeakerConfig) (string, error) { // Check for existing voice voices, err := client.ListVoices(ctx) if err != nil { return "", fmt.Errorf("list voices: %w", err) } for _, v := range voices { if v.Name == cfg.Name { fmt.Printf("✓ Found existing voice: %s\n", v.VoiceID) return v.VoiceID, nil } } // Design new voice fmt.Printf("Creating new voice '%s'...\n", cfg.Name) designResp, err := client.DesignVoice(ctx, elevenlabs.VoiceDesignRequest{ VoiceDescription: cfg.Description, AutoGenerateText: true, GuidanceScale: 3.0, }) if err != nil { return "", fmt.Errorf("design voice: %w", err) } if len(designResp.Previews) == 0 { return "", fmt.Errorf("no voice previews generated") } // Save the first preview saveResp, err := client.SaveDesignedVoice(ctx, elevenlabs.SaveVoiceRequest{ VoiceName: cfg.Name, VoiceDescription: cfg.Description, GeneratedVoiceID: designResp.Previews[0].GeneratedVoiceID, }) if err != nil { return "", fmt.Errorf("save voice: %w", err) } fmt.Printf("✓ Created new voice: %s\n", saveResp.VoiceID) return saveResp.VoiceID, nil }