204 lines
5.9 KiB
Go
204 lines
5.9 KiB
Go
package laozhang
|
|
|
|
import (
|
|
"context"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"fmt"
|
|
"time"
|
|
)
|
|
|
|
const (
|
|
defaultImageModel = "gemini-3-pro-image-preview"
|
|
defaultImageSize = "1K"
|
|
defaultImageCount = 1
|
|
)
|
|
|
|
// ImageRequest represents an image generation request
|
|
type ImageRequest struct {
|
|
Model string `json:"model"` // Model to use for generation (default: "gemini-3-pro-image-preview")
|
|
Prompt string `json:"prompt"` // Required: text description of the desired image
|
|
Size string `json:"size,omitempty"` // Image size: "1K", "2K", or "4K" (default: "1K")
|
|
AspectRatio string `json:"aspect_ratio,omitempty"` // Aspect ratio: "9:16", "16:9", "1:1", "4:3", "3:4" (default: "1:1")
|
|
N int `json:"n,omitempty"` // Number of images to generate (default: 1)
|
|
|
|
// Determinism control (support varies by model)
|
|
Seed *int32 `json:"seed,omitempty"` // Optional: seed for reproducible results (nil = random)
|
|
|
|
// Reference images for identity/style consistency (up to 5 person images supported)
|
|
// Each ReferenceImage should have Data (raw bytes) and MimeType set
|
|
ReferenceImages []ReferenceImage `json:"-"` // Not serialized directly, added to parts
|
|
}
|
|
|
|
// ReferenceImage represents a reference image for image-to-image generation
|
|
type ReferenceImage struct {
|
|
Data []byte // Raw image bytes
|
|
MimeType string // e.g., "image/png", "image/jpeg"
|
|
}
|
|
|
|
// ImageResponse represents an image generation response
|
|
type ImageResponse struct {
|
|
Created int64 `json:"created"` // Unix timestamp of when the image was created
|
|
Data []ImageData `json:"data"` // List of generated images
|
|
}
|
|
|
|
// ImageData represents a single generated image
|
|
type ImageData struct {
|
|
URL string `json:"url,omitempty"` // Image URL (if available)
|
|
B64JSON string `json:"b64_json,omitempty"` // Base64-encoded image data
|
|
}
|
|
|
|
// Gemini native API request/response types (internal)
|
|
|
|
type geminiRequest struct {
|
|
Contents []geminiContent `json:"contents"`
|
|
GenerationConfig geminiGenConfig `json:"generationConfig"`
|
|
}
|
|
|
|
type geminiContent struct {
|
|
Parts []geminiPart `json:"parts"`
|
|
}
|
|
|
|
type geminiPart struct {
|
|
Text string `json:"text,omitempty"`
|
|
InlineData *geminiInlineData `json:"inlineData,omitempty"`
|
|
}
|
|
|
|
type geminiInlineData struct {
|
|
MimeType string `json:"mimeType"`
|
|
Data string `json:"data"` // base64 encoded
|
|
}
|
|
|
|
type geminiGenConfig struct {
|
|
ResponseModalities []string `json:"responseModalities"`
|
|
ImageConfig *geminiImageConfig `json:"imageConfig,omitempty"`
|
|
}
|
|
|
|
type geminiImageConfig struct {
|
|
AspectRatio string `json:"aspectRatio,omitempty"`
|
|
ImageSize string `json:"imageSize,omitempty"`
|
|
}
|
|
|
|
type geminiResponse struct {
|
|
Candidates []geminiCandidate `json:"candidates"`
|
|
}
|
|
|
|
type geminiCandidate struct {
|
|
Content geminiContent `json:"content"`
|
|
}
|
|
|
|
// GenerateImage generates images based on the provided prompt using the Gemini native API
|
|
func (c *Client) GenerateImage(ctx context.Context, req ImageRequest) (*ImageResponse, error) {
|
|
// Validate required fields
|
|
if req.Prompt == "" {
|
|
return nil, fmt.Errorf("%w: prompt is required", ErrInvalidConfig)
|
|
}
|
|
|
|
// Set defaults
|
|
if req.Model == "" {
|
|
req.Model = defaultImageModel
|
|
}
|
|
if req.Size == "" {
|
|
req.Size = defaultImageSize
|
|
}
|
|
if req.N == 0 {
|
|
req.N = defaultImageCount
|
|
}
|
|
|
|
// Validate size
|
|
validSizes := map[string]bool{"1K": true, "2K": true, "4K": true}
|
|
if !validSizes[req.Size] {
|
|
return nil, fmt.Errorf("%w: invalid size %s (must be 1K, 2K, or 4K)", ErrInvalidConfig, req.Size)
|
|
}
|
|
|
|
// Build parts array: text prompt first, then reference images
|
|
parts := []geminiPart{
|
|
{Text: req.Prompt},
|
|
}
|
|
|
|
// Add reference images (up to 5 person images for identity consistency)
|
|
for _, refImg := range req.ReferenceImages {
|
|
if len(refImg.Data) == 0 {
|
|
continue
|
|
}
|
|
mimeType := refImg.MimeType
|
|
if mimeType == "" {
|
|
mimeType = "image/png"
|
|
}
|
|
parts = append(parts, geminiPart{
|
|
InlineData: &geminiInlineData{
|
|
MimeType: mimeType,
|
|
Data: base64.StdEncoding.EncodeToString(refImg.Data),
|
|
},
|
|
})
|
|
}
|
|
|
|
// Build Gemini native request
|
|
geminiReq := geminiRequest{
|
|
Contents: []geminiContent{
|
|
{
|
|
Parts: parts,
|
|
},
|
|
},
|
|
GenerationConfig: geminiGenConfig{
|
|
ResponseModalities: []string{"TEXT", "IMAGE"},
|
|
ImageConfig: &geminiImageConfig{
|
|
ImageSize: req.Size,
|
|
},
|
|
},
|
|
}
|
|
|
|
// Add aspect ratio if specified
|
|
if req.AspectRatio != "" {
|
|
geminiReq.GenerationConfig.ImageConfig.AspectRatio = req.AspectRatio
|
|
}
|
|
|
|
// Build the Gemini endpoint path
|
|
// Format: /v1beta/models/{model}:generateContent
|
|
endpoint := fmt.Sprintf("/v1beta/models/%s:generateContent", req.Model)
|
|
|
|
// Make request - use doRequestRaw to bypass the /v1 base URL
|
|
respBody, err := c.doRequestGemini(ctx, "POST", endpoint, geminiReq)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Parse Gemini response
|
|
var geminiResp geminiResponse
|
|
if err := json.Unmarshal(respBody, &geminiResp); err != nil {
|
|
return nil, fmt.Errorf("unmarshal gemini response: %w (body: %s)", err, string(respBody))
|
|
}
|
|
|
|
// Convert to our ImageResponse format
|
|
imageResp := &ImageResponse{
|
|
Created: time.Now().Unix(),
|
|
Data: make([]ImageData, 0),
|
|
}
|
|
|
|
for _, candidate := range geminiResp.Candidates {
|
|
for _, part := range candidate.Content.Parts {
|
|
if part.InlineData != nil && part.InlineData.Data != "" {
|
|
imageResp.Data = append(imageResp.Data, ImageData{
|
|
B64JSON: part.InlineData.Data,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(imageResp.Data) == 0 {
|
|
// Truncate response and prompt for logging
|
|
bodyPreview := string(respBody)
|
|
if len(bodyPreview) > 500 {
|
|
bodyPreview = bodyPreview[:500] + "..."
|
|
}
|
|
promptPreview := req.Prompt
|
|
if len(promptPreview) > 500 {
|
|
promptPreview = promptPreview[:500] + "..."
|
|
}
|
|
return nil, fmt.Errorf("no images returned in response (candidates=%d, aspect=%s, body=%s, prompt=%s)",
|
|
len(geminiResp.Candidates), req.AspectRatio, bodyPreview, promptPreview)
|
|
}
|
|
|
|
return imageResp, nil
|
|
}
|