Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 65 additions & 13 deletions llm/gem/gem.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,18 @@ const (
// Service provides Gemini completions.
// Fields should not be altered concurrently with calling any method on Service.
type Service struct {
HTTPC *http.Client // defaults to http.DefaultClient if nil
URL string // Gemini API URL, uses the gemini package default if empty
APIKey string // must be non-empty
Model string // defaults to DefaultModel if empty
HTTPC *http.Client // defaults to http.DefaultClient if nil
URL string // Gemini API URL, uses the gemini package default if empty
APIKey string // must be non-empty
Model string // defaults to DefaultModel if empty
ThinkingLevel llm.ThinkingLevel // thinking level (ThinkingLevelOff disables thinkingConfig)

// ReasoningEffort, if non-empty, is used as the thinkingConfig.thinkingLevel
// value sent to Gemini 3.x verbatim, overriding ThinkingLevel. Ignored for
// Gemini 2.5 (which uses thinkingBudget). This mirrors oai.ResponsesService
// so custom-model configurations can pass provider-specific values through
// without Shelley needing to know them.
ReasoningEffort string
}

var _ llm.Service = (*Service)(nil)
Expand Down Expand Up @@ -194,13 +202,17 @@ func (s *Service) buildGeminiRequest(req *llm.Request) (*gemini.Request, error)
// For thinking content, use the Thinking field and preserve the signature
part.Text = c.Thinking
part.ThoughtSignature = c.Signature
part.Thought = true
} else if c.Type == llm.ContentTypeRedactedThinking {
// For redacted thinking, use the Data field (consistent with Anthropic pattern)
part.Text = c.Data
part.ThoughtSignature = c.Signature
part.Thought = true
} else {
// For regular text, use the Text field
// Regular text. Gemini 3 may have attached a thoughtSignature to
// the final-answer text — pass it back so reasoning state survives.
part.Text = c.Text
part.ThoughtSignature = c.Signature
}
content.Parts = append(content.Parts, part)
case llm.ContentTypeToolUse:
Expand Down Expand Up @@ -308,9 +320,47 @@ func (s *Service) buildGeminiRequest(req *llm.Request) (*gemini.Request, error)
}
}

if tc := s.thinkingConfig(); tc != nil {
if gemReq.GenerationConfig == nil {
gemReq.GenerationConfig = &gemini.GenerationConfig{}
}
gemReq.GenerationConfig.ThinkingConfig = tc
}

return gemReq, nil
}

// thinkingConfig builds the Gemini ThinkingConfig from the service settings.
// Returns nil when no thinking config should be sent (use the model default).
func (s *Service) thinkingConfig() *gemini.ThinkingConfig {
if s.ReasoningEffort == "" && s.ThinkingLevel == llm.ThinkingLevelOff {
return nil
}
model := cmp.Or(s.Model, DefaultModel)
if strings.HasPrefix(model, "gemini-3") {
level := s.ReasoningEffort
if level == "" {
level = s.ThinkingLevel.ThinkingEffort()
}
if level == "" {
return nil
}
// gemini-3-pro-preview accepts only "low" and "high".
if model == "gemini-3-pro-preview" {
switch level {
case "minimal", "low":
level = "low"
case "medium", "high":
level = "high"
}
}
return &gemini.ThinkingConfig{ThinkingLevel: level}
}
// Gemini 2.5 (and earlier) uses an integer thinkingBudget.
budget := s.ThinkingLevel.ThinkingBudgetTokens()
return &gemini.ThinkingConfig{ThinkingBudget: &budget}
}

// convertGeminiResponsesToContent converts a Gemini response to llm.Content
func convertGeminiResponseToContent(res *gemini.Response) []llm.Content {
if res == nil || len(res.Candidates) == 0 || len(res.Candidates[0].Content.Parts) == 0 {
Expand All @@ -332,9 +382,10 @@ func convertGeminiResponseToContent(res *gemini.Response) []llm.Content {
"has_function_response", part.FunctionResponse != nil)

if part.Text != "" {
// Check if this is thinking content (has a thought signature)
if part.ThoughtSignature != "" {
// This is thinking content - use ContentTypeThinking
// A part is a thought summary only when thought=true. Gemini 3 attaches
// thoughtSignature to ordinary final-answer text too, for round-tripping
// reasoning state — that signature alone does not make the text a thought.
if part.Thought {
contents = append(contents, llm.Content{
Type: llm.ContentTypeThinking,
Thinking: part.Text,
Expand All @@ -344,10 +395,10 @@ func convertGeminiResponseToContent(res *gemini.Response) []llm.Content {
"signature", part.ThoughtSignature,
"thinking_length", len(part.Text))
} else {
// Regular text response
contents = append(contents, llm.Content{
Type: llm.ContentTypeText,
Text: part.Text,
Type: llm.ContentTypeText,
Text: part.Text,
Signature: part.ThoughtSignature,
})
}
} else if part.FunctionCall != nil {
Expand Down Expand Up @@ -476,8 +527,9 @@ func (s *Service) TokenContextWindow() int {

// Gemini models generally have large context windows
switch model {
case "gemini-3-pro-preview", "gemini-3-flash-preview":
return 1000000 // 1M tokens for Gemini 3
case "gemini-3-pro-preview", "gemini-3-flash-preview",
"gemini-3.1-pro-preview", "gemini-3.1-flash-lite-preview":
return 1000000 // 1M tokens for Gemini 3 / 3.1
case "gemini-2.5-pro", "gemini-2.5-flash":
return 1000000 // 1M tokens for Gemini 2.5
case "gemini-2.0-flash-exp", "gemini-2.0-flash":
Expand Down
123 changes: 122 additions & 1 deletion llm/gem/gem_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -783,7 +783,7 @@ func TestCalculateUsageWithComplexFunctionCall(t *testing.T) {
}

func TestConvertResponseWithThinking(t *testing.T) {
// Test that Gemini responses with ThoughtSignature are converted to ContentTypeThinking
// A thought-summary part (thought:true) is classified as ContentTypeThinking.
gemRes := &gemini.Response{
Candidates: []gemini.Candidate{
{
Expand All @@ -792,6 +792,7 @@ func TestConvertResponseWithThinking(t *testing.T) {
{
Text: "Let me think about this problem step by step...",
ThoughtSignature: "signature-abc123",
Thought: true,
},
},
},
Expand Down Expand Up @@ -879,6 +880,7 @@ func TestConvertResponseWithMixedContent(t *testing.T) {
{
Text: "Thinking about the problem...",
ThoughtSignature: "sig-1",
Thought: true,
},
{
Text: "Here is my answer.",
Expand Down Expand Up @@ -921,6 +923,40 @@ func TestConvertResponseWithMixedContent(t *testing.T) {
}
}

// TestConvertResponseGemini3FinalAnswerWithSignature is a regression test for
// the gemini-3.x "Test failed: empty response from model" bug. Gemini 3 attaches
// a thoughtSignature to ordinary final-answer text parts (not just thoughts) so
// internal reasoning state can be rehydrated next turn. The presence of a signature
// alone must not classify the part as thinking — otherwise the model's actual
// answer disappears from llm.Response.Content.
func TestConvertResponseGemini3FinalAnswerWithSignature(t *testing.T) {
gemRes := &gemini.Response{
Candidates: []gemini.Candidate{{
Content: gemini.Content{
Parts: []gemini.Part{{
Text: "Test successful.",
ThoughtSignature: "Eq0FCqoFAQw51sdx7TPrSqmb0Ts...",
// Thought is intentionally false — this is the final answer.
}},
},
}},
}

contents := convertGeminiResponseToContent(gemRes)
if len(contents) != 1 {
t.Fatalf("got %d contents, want 1", len(contents))
}
if contents[0].Type != llm.ContentTypeText {
t.Fatalf("got type %s, want ContentTypeText", contents[0].Type)
}
if contents[0].Text != "Test successful." {
t.Fatalf("got text %q, want %q", contents[0].Text, "Test successful.")
}
if contents[0].Signature == "" {
t.Fatalf("expected Signature to be preserved on final-answer text for round-tripping")
}
}

func TestBuildGeminiRequestWithThinking(t *testing.T) {
// Test that thinking content is properly converted when building Gemini requests
service := &Service{
Expand Down Expand Up @@ -1060,6 +1096,7 @@ func TestRoundTripThinking(t *testing.T) {
{
Text: "Analyzing the problem...",
ThoughtSignature: "sig-abc",
Thought: true,
},
{
Text: "The answer is 42.",
Expand Down Expand Up @@ -1132,3 +1169,87 @@ func TestRoundTripThinking(t *testing.T) {
t.Fatalf("Expected no signature for text, got '%s'", textPart.ThoughtSignature)
}
}

func TestThinkingConfig(t *testing.T) {
userMsg := llm.Request{
Messages: []llm.Message{{
Role: llm.MessageRoleUser,
Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hi"}},
}},
}

tests := []struct {
name string
svc *Service
wantLevel string
wantBudget *int
wantOmitted bool
}{
{
name: "off by default",
svc: &Service{Model: "gemini-3-flash-preview", APIKey: "x"},
wantOmitted: true,
},
{
name: "gemini-3-flash maps medium to thinkingLevel",
svc: &Service{Model: "gemini-3-flash-preview", APIKey: "x", ThinkingLevel: llm.ThinkingLevelMedium},
wantLevel: "medium",
},
{
name: "gemini-3-pro clamps medium to high",
svc: &Service{Model: "gemini-3-pro-preview", APIKey: "x", ThinkingLevel: llm.ThinkingLevelMedium},
wantLevel: "high",
},
{
name: "gemini-3-pro clamps minimal to low",
svc: &Service{Model: "gemini-3-pro-preview", APIKey: "x", ThinkingLevel: llm.ThinkingLevelMinimal},
wantLevel: "low",
},
{
name: "gemini-3.1-pro accepts medium",
svc: &Service{Model: "gemini-3.1-pro-preview", APIKey: "x", ThinkingLevel: llm.ThinkingLevelMedium},
wantLevel: "medium",
},
{
name: "ReasoningEffort overrides ThinkingLevel for 3.x",
svc: &Service{Model: "gemini-3-flash-preview", APIKey: "x", ThinkingLevel: llm.ThinkingLevelMedium, ReasoningEffort: "high"},
wantLevel: "high",
},
{
name: "gemini-2.5 uses thinkingBudget",
svc: &Service{Model: "gemini-2.5-pro", APIKey: "x", ThinkingLevel: llm.ThinkingLevelMedium},
wantBudget: ptr(8192),
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gemReq, err := tt.svc.buildGeminiRequest(&userMsg)
if err != nil {
t.Fatalf("buildGeminiRequest: %v", err)
}
if tt.wantOmitted {
if gemReq.GenerationConfig != nil && gemReq.GenerationConfig.ThinkingConfig != nil {
t.Fatalf("expected no thinkingConfig, got %+v", gemReq.GenerationConfig.ThinkingConfig)
}
return
}
if gemReq.GenerationConfig == nil || gemReq.GenerationConfig.ThinkingConfig == nil {
t.Fatalf("expected thinkingConfig to be set")
}
tc := gemReq.GenerationConfig.ThinkingConfig
if tc.ThinkingLevel != tt.wantLevel {
t.Errorf("thinkingLevel = %q, want %q", tc.ThinkingLevel, tt.wantLevel)
}
if tt.wantBudget != nil {
if tc.ThinkingBudget == nil || *tc.ThinkingBudget != *tt.wantBudget {
t.Errorf("thinkingBudget = %v, want %d", tc.ThinkingBudget, *tt.wantBudget)
}
} else if tc.ThinkingBudget != nil {
t.Errorf("unexpected thinkingBudget = %d", *tc.ThinkingBudget)
}
})
}
}

func ptr[T any](v T) *T { return &v }
23 changes: 21 additions & 2 deletions llm/gem/gemini/gemini.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,14 @@ type Part struct {
CodeExecutionResult *CodeExecutionResult `json:"codeExecutionResult,omitempty"`
// ThoughtSignature is required for Gemini 3 models when using function calling.
// It must be passed back exactly as received when sending the conversation history.
// Note: presence of ThoughtSignature does NOT mean the part is a thought summary —
// Gemini 3 attaches it to ordinary final-answer text and tool calls so that
// internal reasoning state can be rehydrated on the next turn. Use Thought to
// detect a thought summary.
ThoughtSignature string `json:"thoughtSignature,omitempty"`
// Thought is true when the part is a thought summary (only emitted when
// thinkingConfig.includeThoughts is true). https://ai.google.dev/gemini-api/docs/thinking
Thought bool `json:"thought,omitempty"`
// TODO inlineData
// TODO fileData
}
Expand Down Expand Up @@ -95,8 +102,20 @@ const (

// https://ai.google.dev/api/generate-content#v1beta.GenerationConfig
type GenerationConfig struct {
ResponseMimeType string `json:"responseMimeType,omitempty"` // text/plain, application/json, or text/x.enum
ResponseSchema *Schema `json:"responseSchema,omitempty"` // for JSON
ResponseMimeType string `json:"responseMimeType,omitempty"` // text/plain, application/json, or text/x.enum
ResponseSchema *Schema `json:"responseSchema,omitempty"` // for JSON
ThinkingConfig *ThinkingConfig `json:"thinkingConfig,omitempty"`
}

// ThinkingConfig controls extended thinking for Gemini models.
// ThinkingLevel and ThinkingBudget are mutually exclusive: setting both
// returns a 400 from the API. Use ThinkingLevel for Gemini 3.x and
// ThinkingBudget for Gemini 2.5.
// https://ai.google.dev/gemini-api/docs/thinking
type ThinkingConfig struct {
ThinkingLevel string `json:"thinkingLevel,omitempty"` // Gemini 3.x: "minimal", "low", "medium", "high"
ThinkingBudget *int `json:"thinkingBudget,omitempty"` // Gemini 2.5: token count, -1 dynamic, 0 disable
IncludeThoughts bool `json:"includeThoughts,omitempty"` // include thought summaries in response
}

// https://ai.google.dev/api/caching#Tool
Expand Down
9 changes: 5 additions & 4 deletions models/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -868,10 +868,11 @@ func (m *Manager) createServiceFromModel(model *generated.Model) llm.Service {
}
case "gemini":
return &gem.Service{
APIKey: model.ApiKey,
URL: model.Endpoint,
Model: model.ModelName,
HTTPC: m.httpc,
APIKey: model.ApiKey,
URL: model.Endpoint,
Model: model.ModelName,
HTTPC: m.httpc,
ReasoningEffort: model.ReasoningEffort,
}
default:
if m.logger != nil {
Expand Down
7 changes: 4 additions & 3 deletions server/custom_models.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,9 +373,10 @@ func (s *Server) handleTestModel(w http.ResponseWriter, r *http.Request) {
}
case "gemini":
service = &gem.Service{
APIKey: req.APIKey,
URL: req.Endpoint,
Model: req.ModelName,
APIKey: req.APIKey,
URL: req.Endpoint,
Model: req.ModelName,
ReasoningEffort: req.ReasoningEffort,
}
case "openai-responses":
service = &oai.ResponsesService{
Expand Down