package laozhang import ( "context" "encoding/base64" "encoding/json" "fmt" "time" ) const ( defaultImageModel = "gemini-3-pro-image-preview" defaultImageSize = "1K" defaultImageCount = 1 ) // ImageRequest represents an image generation request type ImageRequest struct { Model string `json:"model"` // Model to use for generation (default: "gemini-3-pro-image-preview") Prompt string `json:"prompt"` // Required: text description of the desired image Size string `json:"size,omitempty"` // Image size: "1K", "2K", or "4K" (default: "1K") AspectRatio string `json:"aspect_ratio,omitempty"` // Aspect ratio: "9:16", "16:9", "1:1", "4:3", "3:4" (default: "1:1") N int `json:"n,omitempty"` // Number of images to generate (default: 1) // Determinism control (support varies by model) Seed *int32 `json:"seed,omitempty"` // Optional: seed for reproducible results (nil = random) // Reference images for identity/style consistency (up to 5 person images supported) // Each ReferenceImage should have Data (raw bytes) and MimeType set ReferenceImages []ReferenceImage `json:"-"` // Not serialized directly, added to parts } // ReferenceImage represents a reference image for image-to-image generation type ReferenceImage struct { Data []byte // Raw image bytes MimeType string // e.g., "image/png", "image/jpeg" } // ImageResponse represents an image generation response type ImageResponse struct { Created int64 `json:"created"` // Unix timestamp of when the image was created Data []ImageData `json:"data"` // List of generated images } // ImageData represents a single generated image type ImageData struct { URL string `json:"url,omitempty"` // Image URL (if available) B64JSON string `json:"b64_json,omitempty"` // Base64-encoded image data } // Gemini native API request/response types (internal) type geminiRequest struct { Contents []geminiContent `json:"contents"` GenerationConfig geminiGenConfig `json:"generationConfig"` } type geminiContent struct { Parts []geminiPart `json:"parts"` } type geminiPart struct { Text string `json:"text,omitempty"` InlineData *geminiInlineData `json:"inlineData,omitempty"` } type geminiInlineData struct { MimeType string `json:"mimeType"` Data string `json:"data"` // base64 encoded } type geminiGenConfig struct { ResponseModalities []string `json:"responseModalities"` ImageConfig *geminiImageConfig `json:"imageConfig,omitempty"` } type geminiImageConfig struct { AspectRatio string `json:"aspectRatio,omitempty"` ImageSize string `json:"imageSize,omitempty"` } type geminiResponse struct { Candidates []geminiCandidate `json:"candidates"` } type geminiCandidate struct { Content geminiContent `json:"content"` } // GenerateImage generates images based on the provided prompt using the Gemini native API func (c *Client) GenerateImage(ctx context.Context, req ImageRequest) (*ImageResponse, error) { // Validate required fields if req.Prompt == "" { return nil, fmt.Errorf("%w: prompt is required", ErrInvalidConfig) } // Set defaults if req.Model == "" { req.Model = defaultImageModel } if req.Size == "" { req.Size = defaultImageSize } if req.N == 0 { req.N = defaultImageCount } // Validate size validSizes := map[string]bool{"1K": true, "2K": true, "4K": true} if !validSizes[req.Size] { return nil, fmt.Errorf("%w: invalid size %s (must be 1K, 2K, or 4K)", ErrInvalidConfig, req.Size) } // Build parts array: text prompt first, then reference images parts := []geminiPart{ {Text: req.Prompt}, } // Add reference images (up to 5 person images for identity consistency) for _, refImg := range req.ReferenceImages { if len(refImg.Data) == 0 { continue } mimeType := refImg.MimeType if mimeType == "" { mimeType = "image/png" } parts = append(parts, geminiPart{ InlineData: &geminiInlineData{ MimeType: mimeType, Data: base64.StdEncoding.EncodeToString(refImg.Data), }, }) } // Build Gemini native request geminiReq := geminiRequest{ Contents: []geminiContent{ { Parts: parts, }, }, GenerationConfig: geminiGenConfig{ ResponseModalities: []string{"TEXT", "IMAGE"}, ImageConfig: &geminiImageConfig{ ImageSize: req.Size, }, }, } // Add aspect ratio if specified if req.AspectRatio != "" { geminiReq.GenerationConfig.ImageConfig.AspectRatio = req.AspectRatio } // Build the Gemini endpoint path // Format: /v1beta/models/{model}:generateContent endpoint := fmt.Sprintf("/v1beta/models/%s:generateContent", req.Model) // Make request - use doRequestRaw to bypass the /v1 base URL respBody, err := c.doRequestGemini(ctx, "POST", endpoint, geminiReq) if err != nil { return nil, err } // Parse Gemini response var geminiResp geminiResponse if err := json.Unmarshal(respBody, &geminiResp); err != nil { return nil, fmt.Errorf("unmarshal gemini response: %w (body: %s)", err, string(respBody)) } // Convert to our ImageResponse format imageResp := &ImageResponse{ Created: time.Now().Unix(), Data: make([]ImageData, 0), } for _, candidate := range geminiResp.Candidates { for _, part := range candidate.Content.Parts { if part.InlineData != nil && part.InlineData.Data != "" { imageResp.Data = append(imageResp.Data, ImageData{ B64JSON: part.InlineData.Data, }) } } } if len(imageResp.Data) == 0 { // Truncate response and prompt for logging bodyPreview := string(respBody) if len(bodyPreview) > 500 { bodyPreview = bodyPreview[:500] + "..." } promptPreview := req.Prompt if len(promptPreview) > 500 { promptPreview = promptPreview[:500] + "..." } return nil, fmt.Errorf("no images returned in response (candidates=%d, aspect=%s, body=%s, prompt=%s)", len(geminiResp.Candidates), req.AspectRatio, bodyPreview, promptPreview) } return imageResp, nil }