182 lines
4.7 KiB
Go
182 lines
4.7 KiB
Go
package gemini
|
|
|
|
import (
|
|
"context"
|
|
"crypto/rand"
|
|
"encoding/binary"
|
|
"fmt"
|
|
|
|
"google.golang.org/genai"
|
|
)
|
|
|
|
const (
|
|
// Gemini API seed range constraints
|
|
minSeed = 1
|
|
maxSeed = 999999
|
|
)
|
|
|
|
const (
|
|
// Gemini native image models (Nano Banana Pro)
|
|
ModelGemini3ProImage = "gemini-3-pro-image-preview"
|
|
|
|
defaultImageModel = ModelGemini3ProImage
|
|
)
|
|
|
|
// ImageRequest represents an image generation request
|
|
type ImageRequest struct {
|
|
Model string // Model to use (default: "gemini-3-pro-image-preview")
|
|
Prompt string // Required: text description of the desired image
|
|
AspectRatio string // Optional: aspect ratio (e.g., "16:9", "1:1", "9:16")
|
|
Size string // Optional: image size - "1K", "2K", "4K" (Gemini 3 Pro only)
|
|
|
|
// Reference image for identity consistency
|
|
ReferenceImage []byte // Optional: reference image bytes
|
|
ReferenceMime string // Optional: MIME type ("image/png", "image/webp")
|
|
|
|
// Determinism controls
|
|
Seed *int32 // Optional: seed for reproducible results (nil = random)
|
|
}
|
|
|
|
// ImageResponse represents an image generation response
|
|
type ImageResponse struct {
|
|
Images []ImageData // List of generated images
|
|
Text string // Optional text response from model
|
|
Seed *int32 // Seed used for generation
|
|
}
|
|
|
|
// ImageData represents a single generated image
|
|
type ImageData struct {
|
|
Data []byte // Raw image bytes
|
|
MimeType string // MIME type of the image
|
|
}
|
|
|
|
// GenerateImage generates images using Gemini native image generation (Nano Banana Pro)
|
|
func (c *Client) GenerateImage(ctx context.Context, req ImageRequest) (*ImageResponse, error) {
|
|
// Validate required fields
|
|
if req.Prompt == "" {
|
|
return nil, fmt.Errorf("%w: prompt is required", ErrInvalidConfig)
|
|
}
|
|
|
|
// Set defaults
|
|
if req.Model == "" {
|
|
req.Model = defaultImageModel
|
|
}
|
|
|
|
// Generate or validate provided seed
|
|
var seed int32
|
|
if req.Seed != nil {
|
|
seed = *req.Seed
|
|
// Validate seed is within Gemini's accepted range
|
|
if seed < minSeed || seed > maxSeed {
|
|
return nil, fmt.Errorf("%w: seed must be between %d and %d (got %d)",
|
|
ErrInvalidConfig, minSeed, maxSeed, seed)
|
|
}
|
|
} else {
|
|
// Generate cryptographically random seed
|
|
var seedBytes [4]byte
|
|
if _, err := rand.Read(seedBytes[:]); err != nil {
|
|
return nil, fmt.Errorf("generate random seed: %w", err)
|
|
}
|
|
seed = int32(binary.LittleEndian.Uint32(seedBytes[:])%maxSeed) + minSeed
|
|
}
|
|
|
|
c.logger.Debug("generating image",
|
|
"model", req.Model,
|
|
"prompt_length", len(req.Prompt),
|
|
"seed", seed,
|
|
"has_reference", len(req.ReferenceImage) > 0,
|
|
)
|
|
|
|
// Build generation config
|
|
config := &genai.GenerateContentConfig{
|
|
ResponseModalities: []string{"image", "text"},
|
|
Seed: &seed,
|
|
}
|
|
|
|
// Apply image-specific config (aspect ratio, size)
|
|
if req.AspectRatio != "" || req.Size != "" {
|
|
config.ImageConfig = &genai.ImageConfig{}
|
|
if req.AspectRatio != "" {
|
|
config.ImageConfig.AspectRatio = req.AspectRatio
|
|
}
|
|
if req.Size != "" {
|
|
config.ImageConfig.ImageSize = req.Size
|
|
}
|
|
}
|
|
|
|
// Build content parts based on whether reference image is provided
|
|
var parts []*genai.Part
|
|
if len(req.ReferenceImage) > 0 {
|
|
// Determine MIME type
|
|
mime := req.ReferenceMime
|
|
if mime == "" {
|
|
mime = "image/png" // default
|
|
}
|
|
|
|
// Multipart content: reference image + text prompt
|
|
parts = []*genai.Part{
|
|
{InlineData: &genai.Blob{MIMEType: mime, Data: req.ReferenceImage}},
|
|
{Text: req.Prompt},
|
|
}
|
|
} else {
|
|
// Text-only content
|
|
parts = []*genai.Part{
|
|
{Text: req.Prompt},
|
|
}
|
|
}
|
|
|
|
content := []*genai.Content{{Parts: parts}}
|
|
|
|
// Call the API with retry for transient errors
|
|
var response *genai.GenerateContentResponse
|
|
err := c.retryWithBackoff(ctx, "GenerateImage", func() error {
|
|
var apiErr error
|
|
response, apiErr = c.genaiClient.Models.GenerateContent(ctx, req.Model, content, config)
|
|
if apiErr != nil {
|
|
return classifyError(apiErr)
|
|
}
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Convert response
|
|
imageResp := &ImageResponse{
|
|
Images: make([]ImageData, 0),
|
|
Seed: &seed,
|
|
}
|
|
|
|
// Extract images and text from response
|
|
if response != nil && len(response.Candidates) > 0 {
|
|
candidate := response.Candidates[0]
|
|
if candidate.Content != nil {
|
|
for _, part := range candidate.Content.Parts {
|
|
if part.Text != "" {
|
|
imageResp.Text = part.Text
|
|
}
|
|
if part.InlineData != nil && len(part.InlineData.Data) > 0 {
|
|
mimeType := "image/png"
|
|
if part.InlineData.MIMEType != "" {
|
|
mimeType = part.InlineData.MIMEType
|
|
}
|
|
imageResp.Images = append(imageResp.Images, ImageData{
|
|
Data: part.InlineData.Data,
|
|
MimeType: mimeType,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(imageResp.Images) == 0 {
|
|
return nil, fmt.Errorf("no images generated")
|
|
}
|
|
|
|
c.logger.Debug("image generation complete",
|
|
"images_count", len(imageResp.Images),
|
|
)
|
|
|
|
return imageResp, nil
|
|
}
|