persona-community-2/pkg/laozhang/image.go
jordan cb3d4d5786
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
ci/woodpecker/manual/woodpecker Pipeline was successful
Initialize project from skeleton template
2026-02-23 10:53:55 +00:00

204 lines
5.9 KiB
Go

package laozhang
import (
"context"
"encoding/base64"
"encoding/json"
"fmt"
"time"
)
const (
defaultImageModel = "gemini-3-pro-image-preview"
defaultImageSize = "1K"
defaultImageCount = 1
)
// ImageRequest represents an image generation request
type ImageRequest struct {
Model string `json:"model"` // Model to use for generation (default: "gemini-3-pro-image-preview")
Prompt string `json:"prompt"` // Required: text description of the desired image
Size string `json:"size,omitempty"` // Image size: "1K", "2K", or "4K" (default: "1K")
AspectRatio string `json:"aspect_ratio,omitempty"` // Aspect ratio: "9:16", "16:9", "1:1", "4:3", "3:4" (default: "1:1")
N int `json:"n,omitempty"` // Number of images to generate (default: 1)
// Determinism control (support varies by model)
Seed *int32 `json:"seed,omitempty"` // Optional: seed for reproducible results (nil = random)
// Reference images for identity/style consistency (up to 5 person images supported)
// Each ReferenceImage should have Data (raw bytes) and MimeType set
ReferenceImages []ReferenceImage `json:"-"` // Not serialized directly, added to parts
}
// ReferenceImage represents a reference image for image-to-image generation
type ReferenceImage struct {
Data []byte // Raw image bytes
MimeType string // e.g., "image/png", "image/jpeg"
}
// ImageResponse represents an image generation response
type ImageResponse struct {
Created int64 `json:"created"` // Unix timestamp of when the image was created
Data []ImageData `json:"data"` // List of generated images
}
// ImageData represents a single generated image
type ImageData struct {
URL string `json:"url,omitempty"` // Image URL (if available)
B64JSON string `json:"b64_json,omitempty"` // Base64-encoded image data
}
// Gemini native API request/response types (internal)
type geminiRequest struct {
Contents []geminiContent `json:"contents"`
GenerationConfig geminiGenConfig `json:"generationConfig"`
}
type geminiContent struct {
Parts []geminiPart `json:"parts"`
}
type geminiPart struct {
Text string `json:"text,omitempty"`
InlineData *geminiInlineData `json:"inlineData,omitempty"`
}
type geminiInlineData struct {
MimeType string `json:"mimeType"`
Data string `json:"data"` // base64 encoded
}
type geminiGenConfig struct {
ResponseModalities []string `json:"responseModalities"`
ImageConfig *geminiImageConfig `json:"imageConfig,omitempty"`
}
type geminiImageConfig struct {
AspectRatio string `json:"aspectRatio,omitempty"`
ImageSize string `json:"imageSize,omitempty"`
}
type geminiResponse struct {
Candidates []geminiCandidate `json:"candidates"`
}
type geminiCandidate struct {
Content geminiContent `json:"content"`
}
// GenerateImage generates images based on the provided prompt using the Gemini native API
func (c *Client) GenerateImage(ctx context.Context, req ImageRequest) (*ImageResponse, error) {
// Validate required fields
if req.Prompt == "" {
return nil, fmt.Errorf("%w: prompt is required", ErrInvalidConfig)
}
// Set defaults
if req.Model == "" {
req.Model = defaultImageModel
}
if req.Size == "" {
req.Size = defaultImageSize
}
if req.N == 0 {
req.N = defaultImageCount
}
// Validate size
validSizes := map[string]bool{"1K": true, "2K": true, "4K": true}
if !validSizes[req.Size] {
return nil, fmt.Errorf("%w: invalid size %s (must be 1K, 2K, or 4K)", ErrInvalidConfig, req.Size)
}
// Build parts array: text prompt first, then reference images
parts := []geminiPart{
{Text: req.Prompt},
}
// Add reference images (up to 5 person images for identity consistency)
for _, refImg := range req.ReferenceImages {
if len(refImg.Data) == 0 {
continue
}
mimeType := refImg.MimeType
if mimeType == "" {
mimeType = "image/png"
}
parts = append(parts, geminiPart{
InlineData: &geminiInlineData{
MimeType: mimeType,
Data: base64.StdEncoding.EncodeToString(refImg.Data),
},
})
}
// Build Gemini native request
geminiReq := geminiRequest{
Contents: []geminiContent{
{
Parts: parts,
},
},
GenerationConfig: geminiGenConfig{
ResponseModalities: []string{"TEXT", "IMAGE"},
ImageConfig: &geminiImageConfig{
ImageSize: req.Size,
},
},
}
// Add aspect ratio if specified
if req.AspectRatio != "" {
geminiReq.GenerationConfig.ImageConfig.AspectRatio = req.AspectRatio
}
// Build the Gemini endpoint path
// Format: /v1beta/models/{model}:generateContent
endpoint := fmt.Sprintf("/v1beta/models/%s:generateContent", req.Model)
// Make request - use doRequestRaw to bypass the /v1 base URL
respBody, err := c.doRequestGemini(ctx, "POST", endpoint, geminiReq)
if err != nil {
return nil, err
}
// Parse Gemini response
var geminiResp geminiResponse
if err := json.Unmarshal(respBody, &geminiResp); err != nil {
return nil, fmt.Errorf("unmarshal gemini response: %w (body: %s)", err, string(respBody))
}
// Convert to our ImageResponse format
imageResp := &ImageResponse{
Created: time.Now().Unix(),
Data: make([]ImageData, 0),
}
for _, candidate := range geminiResp.Candidates {
for _, part := range candidate.Content.Parts {
if part.InlineData != nil && part.InlineData.Data != "" {
imageResp.Data = append(imageResp.Data, ImageData{
B64JSON: part.InlineData.Data,
})
}
}
}
if len(imageResp.Data) == 0 {
// Truncate response and prompt for logging
bodyPreview := string(respBody)
if len(bodyPreview) > 500 {
bodyPreview = bodyPreview[:500] + "..."
}
promptPreview := req.Prompt
if len(promptPreview) > 500 {
promptPreview = promptPreview[:500] + "..."
}
return nil, fmt.Errorf("no images returned in response (candidates=%d, aspect=%s, body=%s, prompt=%s)",
len(geminiResp.Candidates), req.AspectRatio, bodyPreview, promptPreview)
}
return imageResp, nil
}