package laozhang import ( "context" "encoding/json" "fmt" "regexp" "strings" ) const ( defaultVideoModel = "veo-3.1" defaultVideoCount = 1 ) // VideoRequest represents a video generation request type VideoRequest struct { Model string `json:"model"` // Model to use (default: "veo-3.1") Prompt string `json:"prompt"` // Required for text-to-video: text description of the desired video N int `json:"n,omitempty"` // Number of videos to generate (1-4, default: 1) ReferenceImages []string `json:"reference_images,omitempty"` // Optional: base64 or URLs for image-to-video } // VideoResponse represents a video generation response type VideoResponse struct { ID string `json:"id"` // Response ID Created int64 `json:"created"` // Unix timestamp of when the video was created Data []VideoData `json:"data"` // List of generated videos } // VideoData represents a single generated video type VideoData struct { URL string `json:"url"` // Video URL } // videoChatMessage represents the internal chat message format for video generation (request) type videoChatMessage struct { Role string `json:"role"` Content []videoChatContentPart `json:"content"` } // videoChatResponseMessage represents the response message format (content can be string or array) type videoChatResponseMessage struct { Role string `json:"role"` Content json.RawMessage `json:"content"` // Can be string or []videoChatContentPart } // videoChatContentPart represents a part of the message content (text or image) type videoChatContentPart struct { Type string `json:"type"` // "text" or "image_url" Text string `json:"text,omitempty"` // Text content ImageURL *videoChatImageURL `json:"image_url,omitempty"` // Image URL content } // videoChatImageURL represents an image URL in the chat message type videoChatImageURL struct { URL string `json:"url"` // Base64 data URL or HTTP(S) URL } // videoChatRequest represents the internal chat completion request for video generation type videoChatRequest struct { Model string `json:"model"` Messages []videoChatMessage `json:"messages"` Stream bool `json:"stream"` N int `json:"n"` } // videoChatResponse represents the internal chat completion response from Veo API type videoChatResponse struct { ID string `json:"id"` Created int64 `json:"created"` Choices []videoChatChoice `json:"choices"` } // videoChatChoice represents a single choice in the chat response type videoChatChoice struct { Message videoChatResponseMessage `json:"message"` } // GenerateVideo generates videos based on the provided prompt and optional reference images // using the Veo 3.1 models via the chat completions API format. // // For text-to-video, only the Prompt field is required. // For image-to-video (first/last frame interpolation), use models ending in "-fl" and provide ReferenceImages. // // Supported models: // - veo-3.1 (standard, $0.25/gen) // - veo-3.1-fast ($0.15/gen) // - veo-3.1-fl (first/last frame interpolation) // - veo-3.1-fast-fl (fast with interpolation) // - Add "-landscape" suffix for landscape variants (e.g., "veo-3.1-landscape") func (c *Client) GenerateVideo(ctx context.Context, req VideoRequest) (*VideoResponse, error) { // Validate required fields if req.Prompt == "" { return nil, fmt.Errorf("%w: prompt is required", ErrInvalidConfig) } // Set defaults if req.Model == "" { req.Model = defaultVideoModel } if req.N == 0 { req.N = defaultVideoCount } // Validate N is in valid range if req.N < 1 || req.N > 4 { return nil, fmt.Errorf("%w: n must be between 1 and 4, got %d", ErrInvalidConfig, req.N) } // Build message content content := []videoChatContentPart{ { Type: "text", Text: req.Prompt, }, } // Add reference images if provided (for image-to-video) for _, imageURL := range req.ReferenceImages { content = append(content, videoChatContentPart{ Type: "image_url", ImageURL: &videoChatImageURL{ URL: imageURL, }, }) } // Build chat completion request chatReq := videoChatRequest{ Model: req.Model, Messages: []videoChatMessage{ { Role: "user", Content: content, }, }, Stream: false, N: req.N, } // Make request with video client (5m timeout) - video generation takes 2-5 minutes respBody, err := c.doRequestVideo(ctx, "POST", "/chat/completions", chatReq) if err != nil { return nil, err } // Unmarshal chat response var chatResp videoChatResponse if err := json.Unmarshal(respBody, &chatResp); err != nil { return nil, fmt.Errorf("unmarshal response: %w", err) } // Extract video URLs from chat response videoResp := &VideoResponse{ ID: chatResp.ID, Created: chatResp.Created, Data: make([]VideoData, 0, len(chatResp.Choices)), } for _, choice := range chatResp.Choices { // The video URL can be returned as either: // 1. A plain string (the URL directly) // 2. An array of content parts with type "text" // 3. Markdown link format: [download video](url) content := choice.Message.Content // Try to unmarshal as string first var contentStr string if err := json.Unmarshal(content, &contentStr); err == nil { // Content is a plain string (may be URL or markdown link) if url := extractVideoURL(contentStr); url != "" { videoResp.Data = append(videoResp.Data, VideoData{ URL: url, }) } continue } // Try to unmarshal as array of content parts var contentParts []videoChatContentPart if err := json.Unmarshal(content, &contentParts); err == nil { for _, contentPart := range contentParts { if contentPart.Type == "text" && contentPart.Text != "" { if url := extractVideoURL(contentPart.Text); url != "" { videoResp.Data = append(videoResp.Data, VideoData{ URL: url, }) } } } } } if len(videoResp.Data) == 0 { return nil, fmt.Errorf("no video URLs in response (id=%s, choices=%d)", chatResp.ID, len(chatResp.Choices)) } return videoResp, nil } // extractVideoURL extracts a clean URL from various response formats. // LaoZhang sometimes returns URLs wrapped in markdown: [download video](https://...) // This function handles both plain URLs and markdown-wrapped URLs. func extractVideoURL(raw string) string { // Clean whitespace and newlines raw = strings.TrimSpace(raw) // Check for markdown link format: [text](url) mdLinkRegex := regexp.MustCompile(`\[.*?\]\((https?://[^\s\)]+)\)`) if matches := mdLinkRegex.FindStringSubmatch(raw); len(matches) > 1 { return matches[1] } // Check for bare URL urlRegex := regexp.MustCompile(`(https?://[^\s]+)`) if matches := urlRegex.FindStringSubmatch(raw); len(matches) > 1 { return matches[1] } // Return as-is if no pattern matched return raw }