269 lines
8.4 KiB
Go
269 lines
8.4 KiB
Go
package personagen
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"strings"
|
|
|
|
"git.threesix.ai/jordan/persona-community-2/pkg/mediagen"
|
|
"git.threesix.ai/jordan/persona-community-2/pkg/persona"
|
|
)
|
|
|
|
// generateVideo builds a Veo prompt for the given motion type and calls the mediagen provider.
|
|
// Requires anchor bytes (position 1 image) as the reference frame for identity consistency.
|
|
// Returns the VideoSpec and the raw video bytes (to be uploaded by the caller).
|
|
func generateVideo(
|
|
ctx context.Context,
|
|
mg *mediagen.Manager,
|
|
spec *persona.PersonaSpec,
|
|
motionType persona.MotionType,
|
|
anchor []byte,
|
|
logger *slog.Logger,
|
|
) (*persona.VideoSpec, []byte, error) {
|
|
if mg == nil {
|
|
return nil, nil, fmt.Errorf("mediagen not configured")
|
|
}
|
|
|
|
// Find the matching VideoSpec in the spec's Videos slice.
|
|
var videoSpec *persona.VideoSpec
|
|
for i := range spec.Videos {
|
|
if spec.Videos[i].MotionType == motionType {
|
|
videoSpec = &spec.Videos[i]
|
|
break
|
|
}
|
|
}
|
|
if videoSpec == nil {
|
|
// Motion type not found in matrix; create an ephemeral spec.
|
|
vs := persona.DefaultVideoMatrix()
|
|
for i := range vs {
|
|
if vs[i].MotionType == motionType {
|
|
videoSpec = &vs[i]
|
|
break
|
|
}
|
|
}
|
|
}
|
|
if videoSpec == nil {
|
|
return nil, nil, fmt.Errorf("unsupported motion type: %s", motionType)
|
|
}
|
|
|
|
prompt := buildVeoPrompt(spec, motionType)
|
|
videoSpec.Prompt = prompt
|
|
|
|
logger.Info("generating video", "motion_type", motionType, "duration", videoSpec.Duration)
|
|
|
|
resp, err := mg.GenerateVideo(ctx, mediagen.VideoRequest{
|
|
Prompt: prompt,
|
|
AspectRatio: videoSpec.AspectRatio,
|
|
Duration: videoSpec.Duration,
|
|
ReferenceImages: []mediagen.Image{{
|
|
Data: anchor,
|
|
MimeType: "image/png",
|
|
}},
|
|
})
|
|
if err != nil {
|
|
videoSpec.Status = persona.VideoStatusFailed
|
|
return nil, nil, fmt.Errorf("video provider error: %w", err)
|
|
}
|
|
|
|
if len(resp.Videos) == 0 {
|
|
videoSpec.Status = persona.VideoStatusFailed
|
|
return nil, nil, fmt.Errorf("no videos returned from provider for motion type %s", motionType)
|
|
}
|
|
|
|
videoSpec.Status = persona.VideoStatusComplete
|
|
return videoSpec, resp.Videos[0].Data, nil
|
|
}
|
|
|
|
// pronounSet holds subject and object pronouns for a persona.
|
|
type pronounSet struct{ subject, object string }
|
|
|
|
// genderPronouns returns appropriate pronouns based on the persona's gender identity.
|
|
func genderPronouns(spec *persona.PersonaSpec) pronounSet {
|
|
if spec.DNA != nil {
|
|
switch spec.DNA.Identity.Gender {
|
|
case persona.GenderMan:
|
|
return pronounSet{"He", "him"}
|
|
case persona.GenderNonBinary:
|
|
return pronounSet{"They", "them"}
|
|
}
|
|
}
|
|
return pronounSet{"She", "her"}
|
|
}
|
|
|
|
// buildVeoPrompt constructs a Veo video generation prompt for the given motion type.
|
|
// Each motion type produces a distinct narrative and action sequence.
|
|
func buildVeoPrompt(spec *persona.PersonaSpec, motionType persona.MotionType) string {
|
|
identity := buildIdentityLine(spec)
|
|
audio := buildAudioDescriptor(spec)
|
|
pronouns := genderPronouns(spec)
|
|
|
|
switch motionType {
|
|
case persona.MotionSmileReveal:
|
|
return buildSmileRevealPrompt(identity, audio, pronouns)
|
|
case persona.MotionPersonality:
|
|
return buildPersonalityPrompt(spec, identity, audio, pronouns)
|
|
case persona.MotionLifestyle:
|
|
return buildLifestylePrompt(spec, identity, audio, pronouns)
|
|
case persona.MotionInvitation:
|
|
return buildInvitationPrompt(spec, identity, audio, pronouns)
|
|
default:
|
|
return fmt.Sprintf("%s Natural, candid moment, warm natural lighting. %s", identity, audio)
|
|
}
|
|
}
|
|
|
|
// buildSmileRevealPrompt creates a warm, genuine smile reveal video prompt.
|
|
func buildSmileRevealPrompt(identity, audio string, p pronounSet) string {
|
|
return fmt.Sprintf(
|
|
"%s %s looks slightly away, then turns directly to camera with a warm, genuine smile — "+
|
|
"eyes lighting up, expression full of warmth and personality. "+
|
|
"Soft natural lighting, close-up framing, shallow depth of field. "+
|
|
"Slow motion for the smile reveal moment. %s",
|
|
identity, p.subject, audio,
|
|
)
|
|
}
|
|
|
|
// buildPersonalityPrompt creates an expressive personality showcase video prompt.
|
|
func buildPersonalityPrompt(spec *persona.PersonaSpec, identity, audio string, p pronounSet) string {
|
|
expressStyle := "warm and natural"
|
|
if spec.DNA != nil {
|
|
// Use voice expressiveness as a proxy for personality energy.
|
|
switch spec.DNA.Voice.Expressiveness {
|
|
case persona.ExpressivenessAnimated:
|
|
expressStyle = "highly expressive and animated"
|
|
case persona.ExpressivenessExpressive:
|
|
expressStyle = "expressive and engaging"
|
|
}
|
|
}
|
|
|
|
return fmt.Sprintf(
|
|
"%s A candid personality moment — %s is %s, laughing or reacting naturally, "+
|
|
"full of charisma. Dynamic handheld camera movement. "+
|
|
"Golden hour or warm studio lighting. "+
|
|
"Cut between close-up and mid-shot for rhythm. %s",
|
|
identity, strings.ToLower(p.subject), expressStyle, audio,
|
|
)
|
|
}
|
|
|
|
// buildLifestylePrompt creates a contextual lifestyle video prompt.
|
|
func buildLifestylePrompt(spec *persona.PersonaSpec, identity, audio string, p pronounSet) string {
|
|
scene := "stylish urban environment"
|
|
activity := "going about their day"
|
|
|
|
if spec.Lifestyle.VacationStyle.Primary != "" {
|
|
switch spec.Lifestyle.VacationStyle.Primary {
|
|
case "beach", "coastal":
|
|
scene = "sunny beachside setting"
|
|
activity = "walking along the shoreline"
|
|
case "city":
|
|
scene = "vibrant city street"
|
|
activity = "exploring the city"
|
|
case "adventure":
|
|
scene = "scenic outdoor landscape"
|
|
activity = "enjoying the outdoors"
|
|
case "luxury":
|
|
scene = "luxurious upscale setting"
|
|
activity = "enjoying a refined moment"
|
|
case "cultural":
|
|
scene = "culturally rich environment"
|
|
activity = "immersed in their surroundings"
|
|
}
|
|
}
|
|
|
|
if len(spec.Lifestyle.Interests.Active) > 0 {
|
|
activity = spec.Lifestyle.Interests.Active[0]
|
|
}
|
|
|
|
return fmt.Sprintf(
|
|
"%s A natural lifestyle moment — %s is %s in a %s. "+
|
|
"Wide establishing shot transitioning to mid-shot. "+
|
|
"Cinematic 16:9 composition, natural movement, vibrant color grading. %s",
|
|
identity, strings.ToLower(p.subject), activity, scene, audio,
|
|
)
|
|
}
|
|
|
|
// buildInvitationPrompt creates a direct-address invitation video prompt.
|
|
func buildInvitationPrompt(spec *persona.PersonaSpec, identity, audio string, p pronounSet) string {
|
|
name := spec.Name.First
|
|
|
|
return fmt.Sprintf(
|
|
"%s %s looks directly into the camera with a warm, confident expression. "+
|
|
"%s gestures naturally as if personally inviting the viewer, "+
|
|
"making direct eye contact, with a knowing smile. "+
|
|
"Close-up to mid-shot. Clean, aspirational background. "+
|
|
"Cinematic vertical 9:16 framing. %s",
|
|
identity, p.subject, name, audio,
|
|
)
|
|
}
|
|
|
|
// buildIdentityLine creates a one-line identity description for video prompts.
|
|
func buildIdentityLine(spec *persona.PersonaSpec) string {
|
|
if spec.DNA == nil {
|
|
return spec.Name.First
|
|
}
|
|
id := spec.DNA.Identity
|
|
body := spec.DNA.Body
|
|
return fmt.Sprintf(
|
|
"%s, a %d-year-old %s %s with %s %s hair,",
|
|
spec.Name.First,
|
|
id.Age,
|
|
ethnicitToAdj(id.Ethnicity),
|
|
strings.ToLower(string(id.Gender)),
|
|
string(spec.DNA.Face.HairColor),
|
|
string(spec.DNA.Face.HairTexture),
|
|
) + fmt.Sprintf(" %s build, %s skin.", string(body.Build), string(spec.DNA.Face.SkinTone))
|
|
}
|
|
|
|
// buildAudioDescriptor maps VoiceDNA fields to Veo audio generation descriptors.
|
|
func buildAudioDescriptor(spec *persona.PersonaSpec) string {
|
|
if spec.DNA == nil {
|
|
return "Natural ambient audio."
|
|
}
|
|
voice := spec.DNA.Voice
|
|
|
|
pitchDesc := voicePitchDesc(voice.Pitch)
|
|
timbreDesc := string(voice.Timbre)
|
|
cadenceDesc := voiceCadenceDesc(voice.Cadence)
|
|
expressDesc := voiceExpressivenessDesc(voice.Expressiveness)
|
|
|
|
return fmt.Sprintf(
|
|
"Audio: %s %s voice, %s delivery, %s.",
|
|
pitchDesc, timbreDesc, cadenceDesc, expressDesc,
|
|
)
|
|
}
|
|
|
|
func voicePitchDesc(p persona.PitchCategory) string {
|
|
switch p {
|
|
case persona.PitchVeryHigh, persona.PitchHigh:
|
|
return "higher-pitched"
|
|
case persona.PitchLow, persona.PitchVeryLow:
|
|
return "lower-pitched"
|
|
default:
|
|
return "medium-pitched"
|
|
}
|
|
}
|
|
|
|
func voiceCadenceDesc(c persona.CadenceCategory) string {
|
|
switch c {
|
|
case persona.CadenceFast, persona.CadenceVeryFast:
|
|
return "upbeat and quick"
|
|
case persona.CadenceSlow, persona.CadenceVerySlow:
|
|
return "measured and deliberate"
|
|
default:
|
|
return "natural and conversational"
|
|
}
|
|
}
|
|
|
|
func voiceExpressivenessDesc(e persona.ExpressivenessCategory) string {
|
|
switch e {
|
|
case persona.ExpressivenessAnimated:
|
|
return "highly animated with emotional range"
|
|
case persona.ExpressivenessExpressive:
|
|
return "warm and expressive"
|
|
case persona.ExpressivenessMonotone:
|
|
return "calm and even-toned"
|
|
default:
|
|
return "naturally expressive"
|
|
}
|
|
}
|