boldsoftware · nzoschke · May 3, 2026 · May 3, 2026
diff --git a/llm/gem/gem.go b/llm/gem/gem.go
@@ -23,10 +23,18 @@ const (
 // Service provides Gemini completions.
 // Fields should not be altered concurrently with calling any method on Service.
 type Service struct {
-	HTTPC  *http.Client // defaults to http.DefaultClient if nil
-	URL    string       // Gemini API URL, uses the gemini package default if empty
-	APIKey string       // must be non-empty
-	Model  string       // defaults to DefaultModel if empty
+	HTTPC         *http.Client      // defaults to http.DefaultClient if nil
+	URL           string            // Gemini API URL, uses the gemini package default if empty
+	APIKey        string            // must be non-empty
+	Model         string            // defaults to DefaultModel if empty
+	ThinkingLevel llm.ThinkingLevel // thinking level (ThinkingLevelOff disables thinkingConfig)
+
+	// ReasoningEffort, if non-empty, is used as the thinkingConfig.thinkingLevel
+	// value sent to Gemini 3.x verbatim, overriding ThinkingLevel. Ignored for
+	// Gemini 2.5 (which uses thinkingBudget). This mirrors oai.ResponsesService
+	// so custom-model configurations can pass provider-specific values through
+	// without Shelley needing to know them.
+	ReasoningEffort string
 }
 
 var _ llm.Service = (*Service)(nil)
@@ -194,13 +202,17 @@ func (s *Service) buildGeminiRequest(req *llm.Request) (*gemini.Request, error)
 					// For thinking content, use the Thinking field and preserve the signature
 					part.Text = c.Thinking
 					part.ThoughtSignature = c.Signature
+					part.Thought = true
 				} else if c.Type == llm.ContentTypeRedactedThinking {
 					// For redacted thinking, use the Data field (consistent with Anthropic pattern)
 					part.Text = c.Data
 					part.ThoughtSignature = c.Signature
+					part.Thought = true
 				} else {
-					// For regular text, use the Text field
+					// Regular text. Gemini 3 may have attached a thoughtSignature to
+					// the final-answer text — pass it back so reasoning state survives.
 					part.Text = c.Text
+					part.ThoughtSignature = c.Signature
 				}
 				content.Parts = append(content.Parts, part)
 			case llm.ContentTypeToolUse:
@@ -308,9 +320,47 @@ func (s *Service) buildGeminiRequest(req *llm.Request) (*gemini.Request, error)
 		}
 	}
 
+	if tc := s.thinkingConfig(); tc != nil {
+		if gemReq.GenerationConfig == nil {
+			gemReq.GenerationConfig = &gemini.GenerationConfig{}
+		}
+		gemReq.GenerationConfig.ThinkingConfig = tc
+	}
+
 	return gemReq, nil
 }
 
+// thinkingConfig builds the Gemini ThinkingConfig from the service settings.
+// Returns nil when no thinking config should be sent (use the model default).
+func (s *Service) thinkingConfig() *gemini.ThinkingConfig {
+	if s.ReasoningEffort == "" && s.ThinkingLevel == llm.ThinkingLevelOff {
+		return nil
+	}
+	model := cmp.Or(s.Model, DefaultModel)
+	if strings.HasPrefix(model, "gemini-3") {
+		level := s.ReasoningEffort
+		if level == "" {
+			level = s.ThinkingLevel.ThinkingEffort()
+		}
+		if level == "" {
+			return nil
+		}
+		// gemini-3-pro-preview accepts only "low" and "high".
+		if model == "gemini-3-pro-preview" {
+			switch level {
+			case "minimal", "low":
+				level = "low"
+			case "medium", "high":
+				level = "high"
+			}
+		}
+		return &gemini.ThinkingConfig{ThinkingLevel: level}
+	}
+	// Gemini 2.5 (and earlier) uses an integer thinkingBudget.
+	budget := s.ThinkingLevel.ThinkingBudgetTokens()
+	return &gemini.ThinkingConfig{ThinkingBudget: &budget}
+}
+
 // convertGeminiResponsesToContent converts a Gemini response to llm.Content
 func convertGeminiResponseToContent(res *gemini.Response) []llm.Content {
 	if res == nil || len(res.Candidates) == 0 || len(res.Candidates[0].Content.Parts) == 0 {
@@ -332,9 +382,10 @@ func convertGeminiResponseToContent(res *gemini.Response) []llm.Content {
 			"has_function_response", part.FunctionResponse != nil)
 
 		if part.Text != "" {
-			// Check if this is thinking content (has a thought signature)
-			if part.ThoughtSignature != "" {
-				// This is thinking content - use ContentTypeThinking
+			// A part is a thought summary only when thought=true. Gemini 3 attaches
+			// thoughtSignature to ordinary final-answer text too, for round-tripping
+			// reasoning state — that signature alone does not make the text a thought.
+			if part.Thought {
 				contents = append(contents, llm.Content{
 					Type:      llm.ContentTypeThinking,
 					Thinking:  part.Text,
@@ -344,10 +395,10 @@ func convertGeminiResponseToContent(res *gemini.Response) []llm.Content {
 					"signature", part.ThoughtSignature,
 					"thinking_length", len(part.Text))
 			} else {
-				// Regular text response
 				contents = append(contents, llm.Content{
-					Type: llm.ContentTypeText,
-					Text: part.Text,
+					Type:      llm.ContentTypeText,
+					Text:      part.Text,
+					Signature: part.ThoughtSignature,
 				})
 			}
 		} else if part.FunctionCall != nil {
@@ -476,8 +527,9 @@ func (s *Service) TokenContextWindow() int {
 
 	// Gemini models generally have large context windows
 	switch model {
-	case "gemini-3-pro-preview", "gemini-3-flash-preview":
-		return 1000000 // 1M tokens for Gemini 3
+	case "gemini-3-pro-preview", "gemini-3-flash-preview",
+		"gemini-3.1-pro-preview", "gemini-3.1-flash-lite-preview":
+		return 1000000 // 1M tokens for Gemini 3 / 3.1
 	case "gemini-2.5-pro", "gemini-2.5-flash":
 		return 1000000 // 1M tokens for Gemini 2.5
 	case "gemini-2.0-flash-exp", "gemini-2.0-flash":

diff --git a/llm/gem/gem_test.go b/llm/gem/gem_test.go
@@ -783,7 +783,7 @@ func TestCalculateUsageWithComplexFunctionCall(t *testing.T) {
 }
 
 func TestConvertResponseWithThinking(t *testing.T) {
-	// Test that Gemini responses with ThoughtSignature are converted to ContentTypeThinking
+	// A thought-summary part (thought:true) is classified as ContentTypeThinking.
 	gemRes := &gemini.Response{
 		Candidates: []gemini.Candidate{
 			{
@@ -792,6 +792,7 @@ func TestConvertResponseWithThinking(t *testing.T) {
 						{
 							Text:             "Let me think about this problem step by step...",
 							ThoughtSignature: "signature-abc123",
+							Thought:          true,
 						},
 					},
 				},
@@ -879,6 +880,7 @@ func TestConvertResponseWithMixedContent(t *testing.T) {
 						{
 							Text:             "Thinking about the problem...",
 							ThoughtSignature: "sig-1",
+							Thought:          true,
 						},
 						{
 							Text: "Here is my answer.",
@@ -921,6 +923,40 @@ func TestConvertResponseWithMixedContent(t *testing.T) {
 	}
 }
 
+// TestConvertResponseGemini3FinalAnswerWithSignature is a regression test for
+// the gemini-3.x "Test failed: empty response from model" bug. Gemini 3 attaches
+// a thoughtSignature to ordinary final-answer text parts (not just thoughts) so
+// internal reasoning state can be rehydrated next turn. The presence of a signature
+// alone must not classify the part as thinking — otherwise the model's actual
+// answer disappears from llm.Response.Content.
+func TestConvertResponseGemini3FinalAnswerWithSignature(t *testing.T) {
+	gemRes := &gemini.Response{
+		Candidates: []gemini.Candidate{{
+			Content: gemini.Content{
+				Parts: []gemini.Part{{
+					Text:             "Test successful.",
+					ThoughtSignature: "Eq0FCqoFAQw51sdx7TPrSqmb0Ts...",
+					// Thought is intentionally false — this is the final answer.
+				}},
+			},
+		}},
+	}
+
+	contents := convertGeminiResponseToContent(gemRes)
+	if len(contents) != 1 {
+		t.Fatalf("got %d contents, want 1", len(contents))
+	}
+	if contents[0].Type != llm.ContentTypeText {
+		t.Fatalf("got type %s, want ContentTypeText", contents[0].Type)
+	}
+	if contents[0].Text != "Test successful." {
+		t.Fatalf("got text %q, want %q", contents[0].Text, "Test successful.")
+	}
+	if contents[0].Signature == "" {
+		t.Fatalf("expected Signature to be preserved on final-answer text for round-tripping")
+	}
+}
+
 func TestBuildGeminiRequestWithThinking(t *testing.T) {
 	// Test that thinking content is properly converted when building Gemini requests
 	service := &Service{
@@ -1060,6 +1096,7 @@ func TestRoundTripThinking(t *testing.T) {
 						{
 							Text:             "Analyzing the problem...",
 							ThoughtSignature: "sig-abc",
+							Thought:          true,
 						},
 						{
 							Text: "The answer is 42.",
@@ -1132,3 +1169,87 @@ func TestRoundTripThinking(t *testing.T) {
 		t.Fatalf("Expected no signature for text, got '%s'", textPart.ThoughtSignature)
 	}
 }
+
+func TestThinkingConfig(t *testing.T) {
+	userMsg := llm.Request{
+		Messages: []llm.Message{{
+			Role:    llm.MessageRoleUser,
+			Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hi"}},
+		}},
+	}
+
+	tests := []struct {
+		name        string
+		svc         *Service
+		wantLevel   string
+		wantBudget  *int
+		wantOmitted bool
+	}{
+		{
+			name:        "off by default",
+			svc:         &Service{Model: "gemini-3-flash-preview", APIKey: "x"},
+			wantOmitted: true,
+		},
+		{
+			name:      "gemini-3-flash maps medium to thinkingLevel",
+			svc:       &Service{Model: "gemini-3-flash-preview", APIKey: "x", ThinkingLevel: llm.ThinkingLevelMedium},
+			wantLevel: "medium",
+		},
+		{
+			name:      "gemini-3-pro clamps medium to high",
+			svc:       &Service{Model: "gemini-3-pro-preview", APIKey: "x", ThinkingLevel: llm.ThinkingLevelMedium},
+			wantLevel: "high",
+		},
+		{
+			name:      "gemini-3-pro clamps minimal to low",
+			svc:       &Service{Model: "gemini-3-pro-preview", APIKey: "x", ThinkingLevel: llm.ThinkingLevelMinimal},
+			wantLevel: "low",
+		},
+		{
+			name:      "gemini-3.1-pro accepts medium",
+			svc:       &Service{Model: "gemini-3.1-pro-preview", APIKey: "x", ThinkingLevel: llm.ThinkingLevelMedium},
+			wantLevel: "medium",
+		},
+		{
+			name:      "ReasoningEffort overrides ThinkingLevel for 3.x",
+			svc:       &Service{Model: "gemini-3-flash-preview", APIKey: "x", ThinkingLevel: llm.ThinkingLevelMedium, ReasoningEffort: "high"},
+			wantLevel: "high",
+		},
+		{
+			name:       "gemini-2.5 uses thinkingBudget",
+			svc:        &Service{Model: "gemini-2.5-pro", APIKey: "x", ThinkingLevel: llm.ThinkingLevelMedium},
+			wantBudget: ptr(8192),
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			gemReq, err := tt.svc.buildGeminiRequest(&userMsg)
+			if err != nil {
+				t.Fatalf("buildGeminiRequest: %v", err)
+			}
+			if tt.wantOmitted {
+				if gemReq.GenerationConfig != nil && gemReq.GenerationConfig.ThinkingConfig != nil {
+					t.Fatalf("expected no thinkingConfig, got %+v", gemReq.GenerationConfig.ThinkingConfig)
+				}
+				return
+			}
+			if gemReq.GenerationConfig == nil || gemReq.GenerationConfig.ThinkingConfig == nil {
+				t.Fatalf("expected thinkingConfig to be set")
+			}
+			tc := gemReq.GenerationConfig.ThinkingConfig
+			if tc.ThinkingLevel != tt.wantLevel {
+				t.Errorf("thinkingLevel = %q, want %q", tc.ThinkingLevel, tt.wantLevel)
+			}
+			if tt.wantBudget != nil {
+				if tc.ThinkingBudget == nil || *tc.ThinkingBudget != *tt.wantBudget {
+					t.Errorf("thinkingBudget = %v, want %d", tc.ThinkingBudget, *tt.wantBudget)
+				}
+			} else if tc.ThinkingBudget != nil {
+				t.Errorf("unexpected thinkingBudget = %d", *tc.ThinkingBudget)
+			}
+		})
+	}
+}
+
+func ptr[T any](v T) *T { return &v }
diff --git a/llm/gem/gemini/gemini.go b/llm/gem/gemini/gemini.go
@@ -52,7 +52,14 @@ type Part struct {
 	CodeExecutionResult *CodeExecutionResult `json:"codeExecutionResult,omitempty"`
 	// ThoughtSignature is required for Gemini 3 models when using function calling.
 	// It must be passed back exactly as received when sending the conversation history.
+	// Note: presence of ThoughtSignature does NOT mean the part is a thought summary —
+	// Gemini 3 attaches it to ordinary final-answer text and tool calls so that
+	// internal reasoning state can be rehydrated on the next turn. Use Thought to
+	// detect a thought summary.
 	ThoughtSignature string `json:"thoughtSignature,omitempty"`
+	// Thought is true when the part is a thought summary (only emitted when
+	// thinkingConfig.includeThoughts is true). https://ai.google.dev/gemini-api/docs/thinking
+	Thought bool `json:"thought,omitempty"`
 	// TODO inlineData
 	// TODO fileData
 }
@@ -95,8 +102,20 @@ const (
 
 // https://ai.google.dev/api/generate-content#v1beta.GenerationConfig
 type GenerationConfig struct {
-	ResponseMimeType string  `json:"responseMimeType,omitempty"` // text/plain, application/json, or text/x.enum
-	ResponseSchema   *Schema `json:"responseSchema,omitempty"`   // for JSON
+	ResponseMimeType string          `json:"responseMimeType,omitempty"` // text/plain, application/json, or text/x.enum
+	ResponseSchema   *Schema         `json:"responseSchema,omitempty"`   // for JSON
+	ThinkingConfig   *ThinkingConfig `json:"thinkingConfig,omitempty"`
+}
+
+// ThinkingConfig controls extended thinking for Gemini models.
+// ThinkingLevel and ThinkingBudget are mutually exclusive: setting both
+// returns a 400 from the API. Use ThinkingLevel for Gemini 3.x and
+// ThinkingBudget for Gemini 2.5.
+// https://ai.google.dev/gemini-api/docs/thinking
+type ThinkingConfig struct {
+	ThinkingLevel   string `json:"thinkingLevel,omitempty"`   // Gemini 3.x: "minimal", "low", "medium", "high"
+	ThinkingBudget  *int   `json:"thinkingBudget,omitempty"`  // Gemini 2.5: token count, -1 dynamic, 0 disable
+	IncludeThoughts bool   `json:"includeThoughts,omitempty"` // include thought summaries in response
 }
 
 // https://ai.google.dev/api/caching#Tool

diff --git a/models/models.go b/models/models.go
@@ -868,10 +868,11 @@ func (m *Manager) createServiceFromModel(model *generated.Model) llm.Service {
 		}
 	case "gemini":
 		return &gem.Service{
-			APIKey: model.ApiKey,
-			URL:    model.Endpoint,
-			Model:  model.ModelName,
-			HTTPC:  m.httpc,
+			APIKey:          model.ApiKey,
+			URL:             model.Endpoint,
+			Model:           model.ModelName,
+			HTTPC:           m.httpc,
+			ReasoningEffort: model.ReasoningEffort,
 		}
 	default:
 		if m.logger != nil {

diff --git a/server/custom_models.go b/server/custom_models.go
@@ -373,9 +373,10 @@ func (s *Server) handleTestModel(w http.ResponseWriter, r *http.Request) {
 		}
 	case "gemini":
 		service = &gem.Service{
-			APIKey: req.APIKey,
-			URL:    req.Endpoint,
-			Model:  req.ModelName,
+			APIKey:          req.APIKey,
+			URL:             req.Endpoint,
+			Model:           req.ModelName,
+			ReasoningEffort: req.ReasoningEffort,
 		}
 	case "openai-responses":
 		service = &oai.ResponsesService{