persona-community-5/pkg/laozhang/video.go
jordan bd2f591b98
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
ci/woodpecker/manual/woodpecker Pipeline was successful
Initialize project from skeleton template
2026-02-24 07:39:46 +00:00

224 lines
6.9 KiB
Go

package laozhang
import (
"context"
"encoding/json"
"fmt"
"regexp"
"strings"
)
const (
defaultVideoModel = "veo-3.1"
defaultVideoCount = 1
)
// VideoRequest represents a video generation request
type VideoRequest struct {
Model string `json:"model"` // Model to use (default: "veo-3.1")
Prompt string `json:"prompt"` // Required for text-to-video: text description of the desired video
N int `json:"n,omitempty"` // Number of videos to generate (1-4, default: 1)
ReferenceImages []string `json:"reference_images,omitempty"` // Optional: base64 or URLs for image-to-video
}
// VideoResponse represents a video generation response
type VideoResponse struct {
ID string `json:"id"` // Response ID
Created int64 `json:"created"` // Unix timestamp of when the video was created
Data []VideoData `json:"data"` // List of generated videos
}
// VideoData represents a single generated video
type VideoData struct {
URL string `json:"url"` // Video URL
}
// videoChatMessage represents the internal chat message format for video generation (request)
type videoChatMessage struct {
Role string `json:"role"`
Content []videoChatContentPart `json:"content"`
}
// videoChatResponseMessage represents the response message format (content can be string or array)
type videoChatResponseMessage struct {
Role string `json:"role"`
Content json.RawMessage `json:"content"` // Can be string or []videoChatContentPart
}
// videoChatContentPart represents a part of the message content (text or image)
type videoChatContentPart struct {
Type string `json:"type"` // "text" or "image_url"
Text string `json:"text,omitempty"` // Text content
ImageURL *videoChatImageURL `json:"image_url,omitempty"` // Image URL content
}
// videoChatImageURL represents an image URL in the chat message
type videoChatImageURL struct {
URL string `json:"url"` // Base64 data URL or HTTP(S) URL
}
// videoChatRequest represents the internal chat completion request for video generation
type videoChatRequest struct {
Model string `json:"model"`
Messages []videoChatMessage `json:"messages"`
Stream bool `json:"stream"`
N int `json:"n"`
}
// videoChatResponse represents the internal chat completion response from Veo API
type videoChatResponse struct {
ID string `json:"id"`
Created int64 `json:"created"`
Choices []videoChatChoice `json:"choices"`
}
// videoChatChoice represents a single choice in the chat response
type videoChatChoice struct {
Message videoChatResponseMessage `json:"message"`
}
// GenerateVideo generates videos based on the provided prompt and optional reference images
// using the Veo 3.1 models via the chat completions API format.
//
// For text-to-video, only the Prompt field is required.
// For image-to-video (first/last frame interpolation), use models ending in "-fl" and provide ReferenceImages.
//
// Supported models:
// - veo-3.1 (standard, $0.25/gen)
// - veo-3.1-fast ($0.15/gen)
// - veo-3.1-fl (first/last frame interpolation)
// - veo-3.1-fast-fl (fast with interpolation)
// - Add "-landscape" suffix for landscape variants (e.g., "veo-3.1-landscape")
func (c *Client) GenerateVideo(ctx context.Context, req VideoRequest) (*VideoResponse, error) {
// Validate required fields
if req.Prompt == "" {
return nil, fmt.Errorf("%w: prompt is required", ErrInvalidConfig)
}
// Set defaults
if req.Model == "" {
req.Model = defaultVideoModel
}
if req.N == 0 {
req.N = defaultVideoCount
}
// Validate N is in valid range
if req.N < 1 || req.N > 4 {
return nil, fmt.Errorf("%w: n must be between 1 and 4, got %d", ErrInvalidConfig, req.N)
}
// Build message content
content := []videoChatContentPart{
{
Type: "text",
Text: req.Prompt,
},
}
// Add reference images if provided (for image-to-video)
for _, imageURL := range req.ReferenceImages {
content = append(content, videoChatContentPart{
Type: "image_url",
ImageURL: &videoChatImageURL{
URL: imageURL,
},
})
}
// Build chat completion request
chatReq := videoChatRequest{
Model: req.Model,
Messages: []videoChatMessage{
{
Role: "user",
Content: content,
},
},
Stream: false,
N: req.N,
}
// Make request with video client (5m timeout) - video generation takes 2-5 minutes
respBody, err := c.doRequestVideo(ctx, "POST", "/chat/completions", chatReq)
if err != nil {
return nil, err
}
// Unmarshal chat response
var chatResp videoChatResponse
if err := json.Unmarshal(respBody, &chatResp); err != nil {
return nil, fmt.Errorf("unmarshal response: %w", err)
}
// Extract video URLs from chat response
videoResp := &VideoResponse{
ID: chatResp.ID,
Created: chatResp.Created,
Data: make([]VideoData, 0, len(chatResp.Choices)),
}
for _, choice := range chatResp.Choices {
// The video URL can be returned as either:
// 1. A plain string (the URL directly)
// 2. An array of content parts with type "text"
// 3. Markdown link format: [download video](url)
content := choice.Message.Content
// Try to unmarshal as string first
var contentStr string
if err := json.Unmarshal(content, &contentStr); err == nil {
// Content is a plain string (may be URL or markdown link)
if url := extractVideoURL(contentStr); url != "" {
videoResp.Data = append(videoResp.Data, VideoData{
URL: url,
})
}
continue
}
// Try to unmarshal as array of content parts
var contentParts []videoChatContentPart
if err := json.Unmarshal(content, &contentParts); err == nil {
for _, contentPart := range contentParts {
if contentPart.Type == "text" && contentPart.Text != "" {
if url := extractVideoURL(contentPart.Text); url != "" {
videoResp.Data = append(videoResp.Data, VideoData{
URL: url,
})
}
}
}
}
}
if len(videoResp.Data) == 0 {
return nil, fmt.Errorf("no video URLs in response (id=%s, choices=%d)", chatResp.ID, len(chatResp.Choices))
}
return videoResp, nil
}
// extractVideoURL extracts a clean URL from various response formats.
// LaoZhang sometimes returns URLs wrapped in markdown: [download video](https://...)
// This function handles both plain URLs and markdown-wrapped URLs.
func extractVideoURL(raw string) string {
// Clean whitespace and newlines
raw = strings.TrimSpace(raw)
// Check for markdown link format: [text](url)
mdLinkRegex := regexp.MustCompile(`\[.*?\]\((https?://[^\s\)]+)\)`)
if matches := mdLinkRegex.FindStringSubmatch(raw); len(matches) > 1 {
return matches[1]
}
// Check for bare URL
urlRegex := regexp.MustCompile(`(https?://[^\s]+)`)
if matches := urlRegex.FindStringSubmatch(raw); len(matches) > 1 {
return matches[1]
}
// Return as-is if no pattern matched
return raw
}