persona-community-2/pkg/personagen/videogen.go
jordan cb3d4d5786
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
ci/woodpecker/manual/woodpecker Pipeline was successful
Initialize project from skeleton template
2026-02-23 10:53:55 +00:00

269 lines
8.4 KiB
Go

package personagen
import (
"context"
"fmt"
"log/slog"
"strings"
"git.threesix.ai/jordan/persona-community-2/pkg/mediagen"
"git.threesix.ai/jordan/persona-community-2/pkg/persona"
)
// generateVideo builds a Veo prompt for the given motion type and calls the mediagen provider.
// Requires anchor bytes (position 1 image) as the reference frame for identity consistency.
// Returns the VideoSpec and the raw video bytes (to be uploaded by the caller).
func generateVideo(
ctx context.Context,
mg *mediagen.Manager,
spec *persona.PersonaSpec,
motionType persona.MotionType,
anchor []byte,
logger *slog.Logger,
) (*persona.VideoSpec, []byte, error) {
if mg == nil {
return nil, nil, fmt.Errorf("mediagen not configured")
}
// Find the matching VideoSpec in the spec's Videos slice.
var videoSpec *persona.VideoSpec
for i := range spec.Videos {
if spec.Videos[i].MotionType == motionType {
videoSpec = &spec.Videos[i]
break
}
}
if videoSpec == nil {
// Motion type not found in matrix; create an ephemeral spec.
vs := persona.DefaultVideoMatrix()
for i := range vs {
if vs[i].MotionType == motionType {
videoSpec = &vs[i]
break
}
}
}
if videoSpec == nil {
return nil, nil, fmt.Errorf("unsupported motion type: %s", motionType)
}
prompt := buildVeoPrompt(spec, motionType)
videoSpec.Prompt = prompt
logger.Info("generating video", "motion_type", motionType, "duration", videoSpec.Duration)
resp, err := mg.GenerateVideo(ctx, mediagen.VideoRequest{
Prompt: prompt,
AspectRatio: videoSpec.AspectRatio,
Duration: videoSpec.Duration,
ReferenceImages: []mediagen.Image{{
Data: anchor,
MimeType: "image/png",
}},
})
if err != nil {
videoSpec.Status = persona.VideoStatusFailed
return nil, nil, fmt.Errorf("video provider error: %w", err)
}
if len(resp.Videos) == 0 {
videoSpec.Status = persona.VideoStatusFailed
return nil, nil, fmt.Errorf("no videos returned from provider for motion type %s", motionType)
}
videoSpec.Status = persona.VideoStatusComplete
return videoSpec, resp.Videos[0].Data, nil
}
// pronounSet holds subject and object pronouns for a persona.
type pronounSet struct{ subject, object string }
// genderPronouns returns appropriate pronouns based on the persona's gender identity.
func genderPronouns(spec *persona.PersonaSpec) pronounSet {
if spec.DNA != nil {
switch spec.DNA.Identity.Gender {
case persona.GenderMan:
return pronounSet{"He", "him"}
case persona.GenderNonBinary:
return pronounSet{"They", "them"}
}
}
return pronounSet{"She", "her"}
}
// buildVeoPrompt constructs a Veo video generation prompt for the given motion type.
// Each motion type produces a distinct narrative and action sequence.
func buildVeoPrompt(spec *persona.PersonaSpec, motionType persona.MotionType) string {
identity := buildIdentityLine(spec)
audio := buildAudioDescriptor(spec)
pronouns := genderPronouns(spec)
switch motionType {
case persona.MotionSmileReveal:
return buildSmileRevealPrompt(identity, audio, pronouns)
case persona.MotionPersonality:
return buildPersonalityPrompt(spec, identity, audio, pronouns)
case persona.MotionLifestyle:
return buildLifestylePrompt(spec, identity, audio, pronouns)
case persona.MotionInvitation:
return buildInvitationPrompt(spec, identity, audio, pronouns)
default:
return fmt.Sprintf("%s Natural, candid moment, warm natural lighting. %s", identity, audio)
}
}
// buildSmileRevealPrompt creates a warm, genuine smile reveal video prompt.
func buildSmileRevealPrompt(identity, audio string, p pronounSet) string {
return fmt.Sprintf(
"%s %s looks slightly away, then turns directly to camera with a warm, genuine smile — "+
"eyes lighting up, expression full of warmth and personality. "+
"Soft natural lighting, close-up framing, shallow depth of field. "+
"Slow motion for the smile reveal moment. %s",
identity, p.subject, audio,
)
}
// buildPersonalityPrompt creates an expressive personality showcase video prompt.
func buildPersonalityPrompt(spec *persona.PersonaSpec, identity, audio string, p pronounSet) string {
expressStyle := "warm and natural"
if spec.DNA != nil {
// Use voice expressiveness as a proxy for personality energy.
switch spec.DNA.Voice.Expressiveness {
case persona.ExpressivenessAnimated:
expressStyle = "highly expressive and animated"
case persona.ExpressivenessExpressive:
expressStyle = "expressive and engaging"
}
}
return fmt.Sprintf(
"%s A candid personality moment — %s is %s, laughing or reacting naturally, "+
"full of charisma. Dynamic handheld camera movement. "+
"Golden hour or warm studio lighting. "+
"Cut between close-up and mid-shot for rhythm. %s",
identity, strings.ToLower(p.subject), expressStyle, audio,
)
}
// buildLifestylePrompt creates a contextual lifestyle video prompt.
func buildLifestylePrompt(spec *persona.PersonaSpec, identity, audio string, p pronounSet) string {
scene := "stylish urban environment"
activity := "going about their day"
if spec.Lifestyle.VacationStyle.Primary != "" {
switch spec.Lifestyle.VacationStyle.Primary {
case "beach", "coastal":
scene = "sunny beachside setting"
activity = "walking along the shoreline"
case "city":
scene = "vibrant city street"
activity = "exploring the city"
case "adventure":
scene = "scenic outdoor landscape"
activity = "enjoying the outdoors"
case "luxury":
scene = "luxurious upscale setting"
activity = "enjoying a refined moment"
case "cultural":
scene = "culturally rich environment"
activity = "immersed in their surroundings"
}
}
if len(spec.Lifestyle.Interests.Active) > 0 {
activity = spec.Lifestyle.Interests.Active[0]
}
return fmt.Sprintf(
"%s A natural lifestyle moment — %s is %s in a %s. "+
"Wide establishing shot transitioning to mid-shot. "+
"Cinematic 16:9 composition, natural movement, vibrant color grading. %s",
identity, strings.ToLower(p.subject), activity, scene, audio,
)
}
// buildInvitationPrompt creates a direct-address invitation video prompt.
func buildInvitationPrompt(spec *persona.PersonaSpec, identity, audio string, p pronounSet) string {
name := spec.Name.First
return fmt.Sprintf(
"%s %s looks directly into the camera with a warm, confident expression. "+
"%s gestures naturally as if personally inviting the viewer, "+
"making direct eye contact, with a knowing smile. "+
"Close-up to mid-shot. Clean, aspirational background. "+
"Cinematic vertical 9:16 framing. %s",
identity, p.subject, name, audio,
)
}
// buildIdentityLine creates a one-line identity description for video prompts.
func buildIdentityLine(spec *persona.PersonaSpec) string {
if spec.DNA == nil {
return spec.Name.First
}
id := spec.DNA.Identity
body := spec.DNA.Body
return fmt.Sprintf(
"%s, a %d-year-old %s %s with %s %s hair,",
spec.Name.First,
id.Age,
ethnicitToAdj(id.Ethnicity),
strings.ToLower(string(id.Gender)),
string(spec.DNA.Face.HairColor),
string(spec.DNA.Face.HairTexture),
) + fmt.Sprintf(" %s build, %s skin.", string(body.Build), string(spec.DNA.Face.SkinTone))
}
// buildAudioDescriptor maps VoiceDNA fields to Veo audio generation descriptors.
func buildAudioDescriptor(spec *persona.PersonaSpec) string {
if spec.DNA == nil {
return "Natural ambient audio."
}
voice := spec.DNA.Voice
pitchDesc := voicePitchDesc(voice.Pitch)
timbreDesc := string(voice.Timbre)
cadenceDesc := voiceCadenceDesc(voice.Cadence)
expressDesc := voiceExpressivenessDesc(voice.Expressiveness)
return fmt.Sprintf(
"Audio: %s %s voice, %s delivery, %s.",
pitchDesc, timbreDesc, cadenceDesc, expressDesc,
)
}
func voicePitchDesc(p persona.PitchCategory) string {
switch p {
case persona.PitchVeryHigh, persona.PitchHigh:
return "higher-pitched"
case persona.PitchLow, persona.PitchVeryLow:
return "lower-pitched"
default:
return "medium-pitched"
}
}
func voiceCadenceDesc(c persona.CadenceCategory) string {
switch c {
case persona.CadenceFast, persona.CadenceVeryFast:
return "upbeat and quick"
case persona.CadenceSlow, persona.CadenceVerySlow:
return "measured and deliberate"
default:
return "natural and conversational"
}
}
func voiceExpressivenessDesc(e persona.ExpressivenessCategory) string {
switch e {
case persona.ExpressivenessAnimated:
return "highly animated with emotional range"
case persona.ExpressivenessExpressive:
return "warm and expressive"
case persona.ExpressivenessMonotone:
return "calm and even-toned"
default:
return "naturally expressive"
}
}