diff --git a/livekit-agents/livekit/agents/llm/_provider_format/google.py b/livekit-agents/livekit/agents/llm/_provider_format/google.py index f671a4876a..8518b60281 100644 --- a/livekit-agents/livekit/agents/llm/_provider_format/google.py +++ b/livekit-agents/livekit/agents/llm/_provider_format/google.py @@ -17,7 +17,10 @@ class GoogleFormatData: def to_chat_ctx( - chat_ctx: llm.ChatContext, *, inject_dummy_user_message: bool = True + chat_ctx: llm.ChatContext, + *, + inject_dummy_user_message: bool = True, + thought_signatures: dict[str, bytes] | None = None, ) -> tuple[list[dict], GoogleFormatData]: turns: list[dict] = [] system_messages: list[str] = [] @@ -53,15 +56,17 @@ def to_chat_ctx( elif isinstance(content, llm.ImageContent): parts.append(_to_image_part(content)) elif msg.type == "function_call": - parts.append( - { - "function_call": { - "id": msg.call_id, - "name": msg.name, - "args": json.loads(msg.arguments or "{}"), - } + fc_part = { + "function_call": { + "id": msg.call_id, + "name": msg.name, + "args": json.loads(msg.arguments or "{}"), } - ) + } + # Inject thought_signature if available (Gemini 3 multi-turn function calling) + if thought_signatures and (sig := thought_signatures.get(msg.call_id)): + fc_part["thought_signature"] = sig + parts.append(fc_part) elif msg.type == "function_call_output": response = {"output": msg.output} if not msg.is_error else {"error": msg.output} parts.append( diff --git a/livekit-plugins/livekit-plugins-google/livekit/plugins/google/llm.py b/livekit-plugins/livekit-plugins-google/livekit/plugins/google/llm.py index 5881463631..6e47c4746b 100644 --- a/livekit-plugins/livekit-plugins-google/livekit/plugins/google/llm.py +++ b/livekit-plugins/livekit-plugins-google/livekit/plugins/google/llm.py @@ -45,6 +45,11 @@ from .utils import create_tools_config, to_fnc_ctx, to_response_format +def _is_gemini_3_model(model: str) -> bool: + """Check if model is Gemini 3 series""" + return "gemini-3" in model.lower() or model.lower().startswith("gemini-3") + + @dataclass class _LLMOptions: model: ChatModels | str @@ -157,10 +162,13 @@ def __init__( # Validate thinking_config if is_given(thinking_config): _thinking_budget = None + _thinking_level = None if isinstance(thinking_config, dict): _thinking_budget = thinking_config.get("thinking_budget") + _thinking_level = thinking_config.get("thinking_level") elif isinstance(thinking_config, types.ThinkingConfig): _thinking_budget = thinking_config.thinking_budget + _thinking_level = getattr(thinking_config, "thinking_level", None) if _thinking_budget is not None: if not isinstance(_thinking_budget, int): @@ -191,6 +199,8 @@ def __init__( project=gcp_project, location=gcp_location, ) + # Store thought_signatures for Gemini 3 multi-turn function calling + self._thought_signatures: dict[str, bytes] = {} @property def model(self) -> str: @@ -284,9 +294,45 @@ def chat( if is_given(self._opts.seed): extra["seed"] = self._opts.seed - # Add thinking config if thinking_budget is provided + # Handle thinking_config based on model version if is_given(self._opts.thinking_config): - extra["thinking_config"] = self._opts.thinking_config + is_gemini_3 = _is_gemini_3_model(self._opts.model) + thinking_cfg = self._opts.thinking_config + + # Extract both parameters + _budget = None + _level = None + if isinstance(thinking_cfg, dict): + _budget = thinking_cfg.get("thinking_budget") + _level = thinking_cfg.get("thinking_level") + elif isinstance(thinking_cfg, types.ThinkingConfig): + _budget = thinking_cfg.thinking_budget + _level = getattr(thinking_cfg, "thinking_level", None) + + if is_gemini_3: + # Gemini 3: only support thinking_level + if _budget is not None and _level is None: + logger.warning( + f"Model {self._opts.model} is Gemini 3 which does not support thinking_budget. " + "Please use thinking_level ('low' or 'high') instead. Ignoring thinking_budget." + ) + if _level is not None: + # Use thinking_level only (pass as dict since SDK may not have this field yet) + extra["thinking_config"] = {"thinking_level": _level} + # If neither, let API use default + else: + # Gemini 2.5 and earlier: only support thinking_budget + if _level is not None and _budget is None: + raise ValueError( + f"Model {self._opts.model} does not support thinking_level. " + "Please use thinking_budget (int) instead for Gemini 2.5 and earlier models." + ) + if _budget is not None: + # Use thinking_budget only + extra["thinking_config"] = types.ThinkingConfig(thinking_budget=_budget) + else: + # Pass through original config if no specific handling needed + extra["thinking_config"] = self._opts.thinking_config if is_given(self._opts.automatic_function_calling_config): extra["automatic_function_calling"] = self._opts.automatic_function_calling_config @@ -333,7 +379,14 @@ async def _run(self) -> None: request_id = utils.shortuuid() try: - turns_dict, extra_data = self._chat_ctx.to_provider_format(format="google") + # Pass thought_signatures for Gemini 3 multi-turn function calling + thought_sigs = ( + self._llm._thought_signatures if _is_gemini_3_model(self._model) else None + ) + turns_dict, extra_data = self._chat_ctx.to_provider_format( + format="google", thought_signatures=thought_sigs + ) + turns = [types.Content.model_validate(turn) for turn in turns_dict] function_declarations = to_fnc_ctx(self._tools) tools_config = create_tools_config( @@ -354,6 +407,7 @@ async def _run(self) -> None: ), **self._extra_kwargs, ) + stream = await self._client.aio.models.generate_content_stream( model=self._model, contents=cast(types.ContentListUnion, turns), @@ -433,17 +487,25 @@ async def _run(self) -> None: def _parse_part(self, id: str, part: types.Part) -> llm.ChatChunk | None: if part.function_call: + tool_call = llm.FunctionToolCall( + arguments=json.dumps(part.function_call.args), + name=part.function_call.name, + call_id=part.function_call.id or utils.shortuuid("function_call_"), + ) + + # Store thought_signature for Gemini 3 multi-turn function calling + if ( + _is_gemini_3_model(self._model) + and hasattr(part, "thought_signature") + and part.thought_signature + ): + self._llm._thought_signatures[tool_call.call_id] = part.thought_signature + chat_chunk = llm.ChatChunk( id=id, delta=llm.ChoiceDelta( role="assistant", - tool_calls=[ - llm.FunctionToolCall( - arguments=json.dumps(part.function_call.args), - name=part.function_call.name, - call_id=part.function_call.id or utils.shortuuid("function_call_"), - ) - ], + tool_calls=[tool_call], content=part.text, ), ) diff --git a/livekit-plugins/livekit-plugins-google/livekit/plugins/google/models.py b/livekit-plugins/livekit-plugins-google/livekit/plugins/google/models.py index 86f8bf0cda..03abd162ab 100644 --- a/livekit-plugins/livekit-plugins-google/livekit/plugins/google/models.py +++ b/livekit-plugins/livekit-plugins-google/livekit/plugins/google/models.py @@ -189,6 +189,7 @@ Gender = Literal["male", "female", "neutral"] ChatModels = Literal[ + "gemini-3-pro-preview", "gemini-2.5-pro-preview-05-06", "gemini-2.5-flash-preview-04-17", "gemini-2.5-flash-preview-05-20",