persona-community-2/pkg/personagen/videogen.go

package personagen

import (
	"context"
	"fmt"
	"log/slog"
	"strings"

	"git.threesix.ai/jordan/persona-community-2/pkg/mediagen"
	"git.threesix.ai/jordan/persona-community-2/pkg/persona"
)

// generateVideo builds a Veo prompt for the given motion type and calls the mediagen provider.
// Requires anchor bytes (position 1 image) as the reference frame for identity consistency.
// Returns the VideoSpec and the raw video bytes (to be uploaded by the caller).
func generateVideo(
	ctx context.Context,
	mg *mediagen.Manager,
	spec *persona.PersonaSpec,
	motionType persona.MotionType,
	anchor []byte,
	logger *slog.Logger,
) (*persona.VideoSpec, []byte, error) {
	if mg == nil {
		return nil, nil, fmt.Errorf("mediagen not configured")
	}

	// Find the matching VideoSpec in the spec's Videos slice.
	var videoSpec *persona.VideoSpec
	for i := range spec.Videos {
		if spec.Videos[i].MotionType == motionType {
			videoSpec = &spec.Videos[i]
			break
		}
	}
	if videoSpec == nil {
		// Motion type not found in matrix; create an ephemeral spec.
		vs := persona.DefaultVideoMatrix()
		for i := range vs {
			if vs[i].MotionType == motionType {
				videoSpec = &vs[i]
				break
			}
		}
	}
	if videoSpec == nil {
		return nil, nil, fmt.Errorf("unsupported motion type: %s", motionType)
	}

	prompt := buildVeoPrompt(spec, motionType)
	videoSpec.Prompt = prompt

	logger.Info("generating video", "motion_type", motionType, "duration", videoSpec.Duration)

	resp, err := mg.GenerateVideo(ctx, mediagen.VideoRequest{
		Prompt:      prompt,
		AspectRatio: videoSpec.AspectRatio,
		Duration:    videoSpec.Duration,
		ReferenceImages: []mediagen.Image{{
			Data:     anchor,
			MimeType: "image/png",
		}},
	})
	if err != nil {
		videoSpec.Status = persona.VideoStatusFailed
		return nil, nil, fmt.Errorf("video provider error: %w", err)
	}

	if len(resp.Videos) == 0 {
		videoSpec.Status = persona.VideoStatusFailed
		return nil, nil, fmt.Errorf("no videos returned from provider for motion type %s", motionType)
	}

	videoSpec.Status = persona.VideoStatusComplete
	return videoSpec, resp.Videos[0].Data, nil
}

// pronounSet holds subject and object pronouns for a persona.
type pronounSet struct{ subject, object string }

// genderPronouns returns appropriate pronouns based on the persona's gender identity.
func genderPronouns(spec *persona.PersonaSpec) pronounSet {
	if spec.DNA != nil {
		switch spec.DNA.Identity.Gender {
		case persona.GenderMan:
			return pronounSet{"He", "him"}
		case persona.GenderNonBinary:
			return pronounSet{"They", "them"}
		}
	}
	return pronounSet{"She", "her"}
}

// buildVeoPrompt constructs a Veo video generation prompt for the given motion type.
// Each motion type produces a distinct narrative and action sequence.
func buildVeoPrompt(spec *persona.PersonaSpec, motionType persona.MotionType) string {
	identity := buildIdentityLine(spec)
	audio := buildAudioDescriptor(spec)
	pronouns := genderPronouns(spec)

	switch motionType {
	case persona.MotionSmileReveal:
		return buildSmileRevealPrompt(identity, audio, pronouns)
	case persona.MotionPersonality:
		return buildPersonalityPrompt(spec, identity, audio, pronouns)
	case persona.MotionLifestyle:
		return buildLifestylePrompt(spec, identity, audio, pronouns)
	case persona.MotionInvitation:
		return buildInvitationPrompt(spec, identity, audio, pronouns)
	default:
		return fmt.Sprintf("%s Natural, candid moment, warm natural lighting. %s", identity, audio)
	}
}

// buildSmileRevealPrompt creates a warm, genuine smile reveal video prompt.
func buildSmileRevealPrompt(identity, audio string, p pronounSet) string {
	return fmt.Sprintf(
		"%s %s looks slightly away, then turns directly to camera with a warm, genuine smile — "+
			"eyes lighting up, expression full of warmth and personality. "+
			"Soft natural lighting, close-up framing, shallow depth of field. "+
			"Slow motion for the smile reveal moment. %s",
		identity, p.subject, audio,
	)
}

// buildPersonalityPrompt creates an expressive personality showcase video prompt.
func buildPersonalityPrompt(spec *persona.PersonaSpec, identity, audio string, p pronounSet) string {
	expressStyle := "warm and natural"
	if spec.DNA != nil {
		// Use voice expressiveness as a proxy for personality energy.
		switch spec.DNA.Voice.Expressiveness {
		case persona.ExpressivenessAnimated:
			expressStyle = "highly expressive and animated"
		case persona.ExpressivenessExpressive:
			expressStyle = "expressive and engaging"
		}
	}

	return fmt.Sprintf(
		"%s A candid personality moment — %s is %s, laughing or reacting naturally, "+
			"full of charisma. Dynamic handheld camera movement. "+
			"Golden hour or warm studio lighting. "+
			"Cut between close-up and mid-shot for rhythm. %s",
		identity, strings.ToLower(p.subject), expressStyle, audio,
	)
}

// buildLifestylePrompt creates a contextual lifestyle video prompt.
func buildLifestylePrompt(spec *persona.PersonaSpec, identity, audio string, p pronounSet) string {
	scene := "stylish urban environment"
	activity := "going about their day"

	if spec.Lifestyle.VacationStyle.Primary != "" {
		switch spec.Lifestyle.VacationStyle.Primary {
		case "beach", "coastal":
			scene = "sunny beachside setting"
			activity = "walking along the shoreline"
		case "city":
			scene = "vibrant city street"
			activity = "exploring the city"
		case "adventure":
			scene = "scenic outdoor landscape"
			activity = "enjoying the outdoors"
		case "luxury":
			scene = "luxurious upscale setting"
			activity = "enjoying a refined moment"
		case "cultural":
			scene = "culturally rich environment"
			activity = "immersed in their surroundings"
		}
	}

	if len(spec.Lifestyle.Interests.Active) > 0 {
		activity = spec.Lifestyle.Interests.Active[0]
	}

	return fmt.Sprintf(
		"%s A natural lifestyle moment — %s is %s in a %s. "+
			"Wide establishing shot transitioning to mid-shot. "+
			"Cinematic 16:9 composition, natural movement, vibrant color grading. %s",
		identity, strings.ToLower(p.subject), activity, scene, audio,
	)
}

// buildInvitationPrompt creates a direct-address invitation video prompt.
func buildInvitationPrompt(spec *persona.PersonaSpec, identity, audio string, p pronounSet) string {
	name := spec.Name.First

	return fmt.Sprintf(
		"%s %s looks directly into the camera with a warm, confident expression. "+
			"%s gestures naturally as if personally inviting the viewer, "+
			"making direct eye contact, with a knowing smile. "+
			"Close-up to mid-shot. Clean, aspirational background. "+
			"Cinematic vertical 9:16 framing. %s",
		identity, p.subject, name, audio,
	)
}

// buildIdentityLine creates a one-line identity description for video prompts.
func buildIdentityLine(spec *persona.PersonaSpec) string {
	if spec.DNA == nil {
		return spec.Name.First
	}
	id := spec.DNA.Identity
	body := spec.DNA.Body
	return fmt.Sprintf(
		"%s, a %d-year-old %s %s with %s %s hair,",
		spec.Name.First,
		id.Age,
		ethnicitToAdj(id.Ethnicity),
		strings.ToLower(string(id.Gender)),
		string(spec.DNA.Face.HairColor),
		string(spec.DNA.Face.HairTexture),
	) + fmt.Sprintf(" %s build, %s skin.", string(body.Build), string(spec.DNA.Face.SkinTone))
}

// buildAudioDescriptor maps VoiceDNA fields to Veo audio generation descriptors.
func buildAudioDescriptor(spec *persona.PersonaSpec) string {
	if spec.DNA == nil {
		return "Natural ambient audio."
	}
	voice := spec.DNA.Voice

	pitchDesc := voicePitchDesc(voice.Pitch)
	timbreDesc := string(voice.Timbre)
	cadenceDesc := voiceCadenceDesc(voice.Cadence)
	expressDesc := voiceExpressivenessDesc(voice.Expressiveness)

	return fmt.Sprintf(
		"Audio: %s %s voice, %s delivery, %s.",
		pitchDesc, timbreDesc, cadenceDesc, expressDesc,
	)
}

func voicePitchDesc(p persona.PitchCategory) string {
	switch p {
	case persona.PitchVeryHigh, persona.PitchHigh:
		return "higher-pitched"
	case persona.PitchLow, persona.PitchVeryLow:
		return "lower-pitched"
	default:
		return "medium-pitched"
	}
}

func voiceCadenceDesc(c persona.CadenceCategory) string {
	switch c {
	case persona.CadenceFast, persona.CadenceVeryFast:
		return "upbeat and quick"
	case persona.CadenceSlow, persona.CadenceVerySlow:
		return "measured and deliberate"
	default:
		return "natural and conversational"
	}
}

func voiceExpressivenessDesc(e persona.ExpressivenessCategory) string {
	switch e {
	case persona.ExpressivenessAnimated:
		return "highly animated with emotional range"
	case persona.ExpressivenessExpressive:
		return "warm and expressive"
	case persona.ExpressivenessMonotone:
		return "calm and even-toned"
	default:
		return "naturally expressive"
	}
}