Adds anchor-based image album generation across docs, skeleton, and rendered
full-monorepo. One subject description + one anchor image + N directed shots,
covering personas, products, characters, and brand assets out of the box.
## What ships
**Skeleton packages:**
- pkg/album/types.go — Album, Shot, ShotStatus, ShotTemplate, AlbumUpdater
- pkg/album/templates.go — PortraitSession, ProductShoot, CharacterSheet built-ins
- pkg/album/handler.go — AnchorHandler + ShotHandler queue job handlers
- packages/realtime/src/useAlbumGeneration.ts — SSE hook owning all album state
- packages/ui/src/components/AlbumGrid.tsx — responsive shot grid with shimmer
- packages/ui/src/components/ShotCard.tsx — pending/generating/complete/failed states
- packages/ui/src/components/AnchorPreview.tsx — anchor CTA + image with controls
**Component service template:**
- internal/port/album.go — AlbumRepository interface
- internal/adapter/memory/album.go — in-memory repo for standalone dev
- internal/service/album.go — create, list, get, generateAnchor, generateAllShots
- internal/api/handlers/album.go — HTTP handlers (CRUD + 202 generation endpoints)
- Routes: GET/POST /albums, GET/DELETE /albums/{id}, POST /albums/{id}/anchor,
POST/DELETE /albums/{id}/shots, POST /albums/{id}/shots/{index}
**Documentation:**
- .claude/guides/album.md — full guide with API, SSE events, frontend usage
**Key architecture decisions:**
- Anchor bytes never stored in queue payload — workers fetch AnchorURL at runtime
- Generation order enforced: POST /shots returns 422 if no anchor exists
- All album SSE events on existing user:<userId> channel (no new channel)
- AlbumUpdater interface lets job handlers update repo from inside queue workers
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
254 lines
7.7 KiB
Cheetah
254 lines
7.7 KiB
Cheetah
package personagen
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"strings"
|
|
|
|
"{{GO_MODULE}}/pkg/mediagen"
|
|
"{{GO_MODULE}}/pkg/persona"
|
|
)
|
|
|
|
// generateVideo builds a Veo prompt for the given motion type and calls the mediagen provider.
|
|
// Requires anchor bytes (position 1 image) as the reference frame for identity consistency.
|
|
func generateVideo(
|
|
ctx context.Context,
|
|
mg *mediagen.Manager,
|
|
spec *persona.PersonaSpec,
|
|
motionType persona.MotionType,
|
|
anchor []byte,
|
|
logger *slog.Logger,
|
|
) (*persona.VideoSpec, error) {
|
|
if mg == nil {
|
|
return nil, fmt.Errorf("mediagen not configured")
|
|
}
|
|
|
|
// Find the matching VideoSpec in the spec's Videos slice.
|
|
var videoSpec *persona.VideoSpec
|
|
for i := range spec.Videos {
|
|
if spec.Videos[i].MotionType == motionType {
|
|
videoSpec = &spec.Videos[i]
|
|
break
|
|
}
|
|
}
|
|
if videoSpec == nil {
|
|
// Motion type not found in matrix; create an ephemeral spec.
|
|
vs := persona.DefaultVideoMatrix()
|
|
for i := range vs {
|
|
if vs[i].MotionType == motionType {
|
|
videoSpec = &vs[i]
|
|
break
|
|
}
|
|
}
|
|
}
|
|
if videoSpec == nil {
|
|
return nil, fmt.Errorf("unsupported motion type: %s", motionType)
|
|
}
|
|
|
|
prompt := buildVeoPrompt(spec, motionType)
|
|
videoSpec.Prompt = prompt
|
|
|
|
logger.Info("generating video", "motion_type", motionType, "duration", videoSpec.Duration)
|
|
|
|
resp, err := mg.GenerateVideo(ctx, mediagen.VideoRequest{
|
|
Prompt: prompt,
|
|
AspectRatio: videoSpec.AspectRatio,
|
|
Duration: videoSpec.Duration,
|
|
ReferenceImages: []mediagen.Image{{
|
|
Data: anchor,
|
|
MimeType: "image/png",
|
|
}},
|
|
})
|
|
if err != nil {
|
|
videoSpec.Status = persona.VideoStatusFailed
|
|
return nil, fmt.Errorf("video provider error: %w", err)
|
|
}
|
|
|
|
if len(resp.Videos) == 0 {
|
|
videoSpec.Status = persona.VideoStatusFailed
|
|
return nil, fmt.Errorf("no videos returned from provider for motion type %s", motionType)
|
|
}
|
|
|
|
// URL will be set by the caller after uploading to storage.
|
|
videoSpec.Status = persona.VideoStatusComplete
|
|
return videoSpec, nil
|
|
}
|
|
|
|
// buildVeoPrompt constructs a Veo video generation prompt for the given motion type.
|
|
// Each motion type produces a distinct narrative and action sequence.
|
|
func buildVeoPrompt(spec *persona.PersonaSpec, motionType persona.MotionType) string {
|
|
identity := buildIdentityLine(spec)
|
|
audio := buildAudioDescriptor(spec)
|
|
|
|
switch motionType {
|
|
case persona.MotionSmileReveal:
|
|
return buildSmileRevealPrompt(identity, audio)
|
|
case persona.MotionPersonality:
|
|
return buildPersonalityPrompt(spec, identity, audio)
|
|
case persona.MotionLifestyle:
|
|
return buildLifestylePrompt(spec, identity, audio)
|
|
case persona.MotionInvitation:
|
|
return buildInvitationPrompt(spec, identity, audio)
|
|
default:
|
|
return fmt.Sprintf("%s Natural, candid moment, warm natural lighting. %s", identity, audio)
|
|
}
|
|
}
|
|
|
|
// buildSmileRevealPrompt creates a warm, genuine smile reveal video prompt.
|
|
func buildSmileRevealPrompt(identity, audio string) string {
|
|
return fmt.Sprintf(
|
|
"%s She looks slightly away, then turns directly to camera with a warm, genuine smile — "+
|
|
"eyes lighting up, expression full of warmth and personality. "+
|
|
"Soft natural lighting, close-up framing, shallow depth of field. "+
|
|
"Slow motion for the smile reveal moment. %s",
|
|
identity, audio,
|
|
)
|
|
}
|
|
|
|
// buildPersonalityPrompt creates an expressive personality showcase video prompt.
|
|
func buildPersonalityPrompt(spec *persona.PersonaSpec, identity, audio string) string {
|
|
extraversion := "moderate"
|
|
if spec.DNA != nil {
|
|
// We don't have HEXACO in DNA; use voice expressiveness as a proxy.
|
|
switch spec.DNA.Voice.Expressiveness {
|
|
case persona.ExpressivenessAnimated:
|
|
extraversion = "highly expressive and animated"
|
|
case persona.ExpressivenessExpressive:
|
|
extraversion = "expressive and engaging"
|
|
default:
|
|
extraversion = "warm and natural"
|
|
}
|
|
}
|
|
|
|
return fmt.Sprintf(
|
|
"%s A candid personality moment — she is %s, laughing or reacting naturally, "+
|
|
"full of charisma. Dynamic handheld camera movement. "+
|
|
"Golden hour or warm studio lighting. "+
|
|
"Cut between close-up and mid-shot for rhythm. %s",
|
|
identity, extraversion, audio,
|
|
)
|
|
}
|
|
|
|
// buildLifestylePrompt creates a contextual lifestyle video prompt.
|
|
func buildLifestylePrompt(spec *persona.PersonaSpec, identity, audio string) string {
|
|
scene := "stylish urban environment"
|
|
activity := "going about her day"
|
|
|
|
if spec.Lifestyle.VacationStyle.Primary != "" {
|
|
switch spec.Lifestyle.VacationStyle.Primary {
|
|
case "beach", "coastal":
|
|
scene = "sunny beachside setting"
|
|
activity = "walking along the shoreline"
|
|
case "city":
|
|
scene = "vibrant city street"
|
|
activity = "exploring the city"
|
|
case "adventure":
|
|
scene = "scenic outdoor landscape"
|
|
activity = "enjoying the outdoors"
|
|
case "luxury":
|
|
scene = "luxurious upscale setting"
|
|
activity = "enjoying a refined moment"
|
|
case "cultural":
|
|
scene = "culturally rich environment"
|
|
activity = "immersed in her surroundings"
|
|
}
|
|
}
|
|
|
|
if len(spec.Lifestyle.Interests.Active) > 0 {
|
|
activity = spec.Lifestyle.Interests.Active[0]
|
|
}
|
|
|
|
return fmt.Sprintf(
|
|
"%s A natural lifestyle moment — she is %s in a %s. "+
|
|
"Wide establishing shot transitioning to mid-shot. "+
|
|
"Cinematic 16:9 composition, natural movement, vibrant color grading. %s",
|
|
identity, activity, scene, audio,
|
|
)
|
|
}
|
|
|
|
// buildInvitationPrompt creates a direct-address invitation video prompt.
|
|
func buildInvitationPrompt(spec *persona.PersonaSpec, identity, audio string) string {
|
|
name := spec.Name.First
|
|
|
|
return fmt.Sprintf(
|
|
"%s She looks directly into the camera with a warm, confident expression. "+
|
|
"%s gestures naturally as if personally inviting the viewer, "+
|
|
"making direct eye contact, with a knowing smile. "+
|
|
"Close-up to mid-shot. Clean, aspirational background. "+
|
|
"Cinematic vertical 9:16 framing. %s",
|
|
identity, name, audio,
|
|
)
|
|
}
|
|
|
|
// buildIdentityLine creates a one-line identity description for video prompts.
|
|
func buildIdentityLine(spec *persona.PersonaSpec) string {
|
|
if spec.DNA == nil {
|
|
return spec.Name.First
|
|
}
|
|
id := spec.DNA.Identity
|
|
body := spec.DNA.Body
|
|
return fmt.Sprintf(
|
|
"%s, a %d-year-old %s %s with %s %s hair,",
|
|
spec.Name.First,
|
|
id.Age,
|
|
ethnicitToAdj(id.Ethnicity),
|
|
strings.ToLower(string(id.Gender)),
|
|
string(spec.DNA.Face.HairColor),
|
|
string(spec.DNA.Face.HairTexture),
|
|
) + fmt.Sprintf(" %s build, %s skin.", string(body.Build), string(spec.DNA.Face.SkinTone))
|
|
}
|
|
|
|
// buildAudioDescriptor maps VoiceDNA fields to Veo audio generation descriptors.
|
|
func buildAudioDescriptor(spec *persona.PersonaSpec) string {
|
|
if spec.DNA == nil {
|
|
return "Natural ambient audio."
|
|
}
|
|
voice := spec.DNA.Voice
|
|
|
|
pitchDesc := voicePitchDesc(voice.Pitch)
|
|
timbreDesc := string(voice.Timbre)
|
|
cadenceDesc := voiceCadenceDesc(voice.Cadence)
|
|
expressDesc := voiceExpressivenessDesc(voice.Expressiveness)
|
|
|
|
return fmt.Sprintf(
|
|
"Audio: %s %s voice, %s delivery, %s.",
|
|
pitchDesc, timbreDesc, cadenceDesc, expressDesc,
|
|
)
|
|
}
|
|
|
|
func voicePitchDesc(p persona.PitchCategory) string {
|
|
switch p {
|
|
case persona.PitchVeryHigh, persona.PitchHigh:
|
|
return "higher-pitched"
|
|
case persona.PitchLow, persona.PitchVeryLow:
|
|
return "lower-pitched"
|
|
default:
|
|
return "medium-pitched"
|
|
}
|
|
}
|
|
|
|
func voiceCadenceDesc(c persona.CadenceCategory) string {
|
|
switch c {
|
|
case persona.CadenceFast, persona.CadenceVeryFast:
|
|
return "upbeat and quick"
|
|
case persona.CadenceSlow, persona.CadenceVerySlow:
|
|
return "measured and deliberate"
|
|
default:
|
|
return "natural and conversational"
|
|
}
|
|
}
|
|
|
|
func voiceExpressivenessDesc(e persona.ExpressivenessCategory) string {
|
|
switch e {
|
|
case persona.ExpressivenessAnimated:
|
|
return "highly animated with emotional range"
|
|
case persona.ExpressivenessExpressive:
|
|
return "warm and expressive"
|
|
case persona.ExpressivenessMonotone:
|
|
return "calm and even-toned"
|
|
default:
|
|
return "naturally expressive"
|
|
}
|
|
}
|