diff --git a/examples/basic/functions/main.py b/examples/basic/functions/main.py index 55911ef4b..4b0eaef2e 100644 --- a/examples/basic/functions/main.py +++ b/examples/basic/functions/main.py @@ -6,6 +6,7 @@ from mcp_agent.app import MCPApp from mcp_agent.agents.agent import Agent from mcp_agent.workflows.llm.augmented_llm_openai import OpenAIAugmentedLLM +from mcp_agent.workflows.llm.augmented_llm import RequestParams def add_numbers(a: int, b: int) -> int: @@ -44,6 +45,7 @@ async def calculate(expr: str, app_ctx: Optional[Context] = None) -> str: llm = await math_agent.attach_llm(OpenAIAugmentedLLM) result = await llm.generate_str( message=expr, + request_params=RequestParams(model="gpt-5.1", reasoning_effort="none"), ) logger.info(f"Expert math result: {result}") diff --git a/schema/mcp-agent.config.schema.json b/schema/mcp-agent.config.schema.json index 962822ccf..0288545af 100644 --- a/schema/mcp-agent.config.schema.json +++ b/schema/mcp-agent.config.schema.json @@ -1331,6 +1331,7 @@ "reasoning_effort": { "default": "medium", "enum": [ + "none", "low", "medium", "high" diff --git a/src/mcp_agent/config.py b/src/mcp_agent/config.py index d203e299f..907d4e323 100644 --- a/src/mcp_agent/config.py +++ b/src/mcp_agent/config.py @@ -418,7 +418,7 @@ class OpenAISettings(BaseSettings): validation_alias=AliasChoices("api_key", "OPENAI_API_KEY", "openai__api_key"), ) - reasoning_effort: Literal["low", "medium", "high"] = Field( + reasoning_effort: Literal["none", "low", "medium", "high"] = Field( default="medium", validation_alias=AliasChoices( "reasoning_effort", "OPENAI_REASONING_EFFORT", "openai__reasoning_effort" diff --git a/src/mcp_agent/workflows/llm/augmented_llm.py b/src/mcp_agent/workflows/llm/augmented_llm.py index 35f8eec7d..4b97f20e4 100644 --- a/src/mcp_agent/workflows/llm/augmented_llm.py +++ b/src/mcp_agent/workflows/llm/augmented_llm.py @@ -12,6 +12,7 @@ TypeVar, Union, TYPE_CHECKING, + Literal, ) from opentelemetry import trace @@ -195,6 +196,13 @@ class RequestParams(CreateMessageRequestParams): Tool names should match exactly as they appear in the server's tool list. """ + reasoning_effort: Optional[Literal["none", "low", "medium", "high"]] = None + """ + (OpenAI only) Controls the reasoning effort for o1/o3/o4/gpt-5/gpt-5.1 models. + Valid values: 'none', 'low', 'medium', 'high' + Ignored by other providers. + """ + class AugmentedLLMProtocol(Protocol, Generic[MessageParamT, MessageT]): """Protocol defining the interface for augmented LLMs""" diff --git a/src/mcp_agent/workflows/llm/augmented_llm_openai.py b/src/mcp_agent/workflows/llm/augmented_llm_openai.py index 90f7bedba..3b5b1ef87 100644 --- a/src/mcp_agent/workflows/llm/augmented_llm_openai.py +++ b/src/mcp_agent/workflows/llm/augmented_llm_openai.py @@ -277,7 +277,8 @@ async def generate( # DEPRECATED: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens # "max_tokens": params.maxTokens, "max_completion_tokens": params.maxTokens, - "reasoning_effort": self._reasoning_effort, + "reasoning_effort": params.reasoning_effort + or self._reasoning_effort, } else: arguments = {**arguments, "max_tokens": params.maxTokens} @@ -558,7 +559,9 @@ def _ensure_no_additional_props_and_require_all(node: dict): # DEPRECATED: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens # "max_tokens": params.maxTokens, payload["max_completion_tokens"] = params.maxTokens - payload["reasoning_effort"] = self._reasoning_effort + payload["reasoning_effort"] = ( + params.reasoning_effort or self._reasoning_effort + ) else: payload["max_tokens"] = params.maxTokens user = params.user or getattr(self.context.config.openai, "user", None) diff --git a/tests/workflows/llm/test_augmented_llm_openai.py b/tests/workflows/llm/test_augmented_llm_openai.py index 2972fe47c..25852f7e5 100644 --- a/tests/workflows/llm/test_augmented_llm_openai.py +++ b/tests/workflows/llm/test_augmented_llm_openai.py @@ -690,3 +690,149 @@ async def test_user_in_openai_config(self, mock_llm, default_usage): # Check that the user field is present in the payload request_obj = mock_llm.executor.execute.call_args[0][1] assert request_obj.payload.get("user") == "config_user_id" + + @pytest.mark.asyncio + async def test_reasoning_effort_in_payload(self, mock_llm, default_usage): + """ + Tests that reasoning_effort from RequestParams is correctly passed to the API payload. + """ + # Setup mock executor + mock_llm.executor.execute = AsyncMock( + return_value=self.create_text_response("Test response", usage=default_usage) + ) + + # IMPORTANT: Mock select_model to return a reasoning model + mock_llm.select_model = AsyncMock(return_value="gpt-5.1") + + # Call LLM with custom reasoning_effort + await mock_llm.generate( + "Test query", + request_params=RequestParams(model="gpt-5.1", reasoning_effort="high"), + ) + + # Verify the payload contains reasoning_effort + request_obj = mock_llm.executor.execute.call_args[0][1] + assert request_obj.payload["reasoning_effort"] == "high" + assert request_obj.payload["model"] == "gpt-5.1" + # Should use max_completion_tokens for reasoning models + assert "max_completion_tokens" in request_obj.payload + assert "max_tokens" not in request_obj.payload + + @pytest.mark.asyncio + async def test_reasoning_effort_fallback(self, mock_llm, default_usage): + """ + Tests that reasoning_effort falls back to config default when not specified. + """ + # Setup mock executor + mock_llm.executor.execute = AsyncMock( + return_value=self.create_text_response("Test response", usage=default_usage) + ) + + # Mock select_model to return a reasoning model + mock_llm.select_model = AsyncMock(return_value="gpt-5.1") + + # Call LLM without specifying reasoning_effort (should use config default: "medium") + await mock_llm.generate( + "Test query", request_params=RequestParams(model="gpt-5.1") + ) + + # Verify the payload uses config default + request_obj = mock_llm.executor.execute.call_args[0][1] + assert request_obj.payload["reasoning_effort"] == "medium" + + @pytest.mark.asyncio + async def test_reasoning_effort_values(self, mock_llm, default_usage): + """ + Tests that different reasoning_effort values are correctly passed. + """ + test_cases = ["none", "low", "medium", "high"] + + for effort in test_cases: + # Setup mock executor + mock_llm.executor.execute = AsyncMock( + return_value=self.create_text_response( + f"Response with {effort}", usage=default_usage + ) + ) + + # Mock select_model to return a reasoning model + mock_llm.select_model = AsyncMock(return_value="gpt-5.1") + + # Call LLM with specific reasoning_effort + await mock_llm.generate( + "Test query", + request_params=RequestParams(model="gpt-5.1", reasoning_effort=effort), + ) + + # Verify the payload contains correct reasoning_effort + request_obj = mock_llm.executor.execute.call_args[0][1] + assert request_obj.payload["reasoning_effort"] == effort + + @pytest.mark.asyncio + async def test_reasoning_effort_not_applied_to_non_reasoning_model( + self, mock_llm, default_usage + ): + """ + Tests that reasoning_effort is not applied to non-reasoning models. + """ + # Setup mock executor + mock_llm.executor.execute = AsyncMock( + return_value=self.create_text_response("Test response", usage=default_usage) + ) + + # Mock select_model to return a NON-reasoning model + mock_llm.select_model = AsyncMock(return_value="gpt-4.1") + + # Call LLM with non-reasoning model (even if reasoning_effort is specified) + await mock_llm.generate( + "Test query", + request_params=RequestParams( + model="gpt-4.1", + reasoning_effort="high", # This should be ignored + ), + ) + + # Verify reasoning_effort is NOT in payload for non-reasoning models + request_obj = mock_llm.executor.execute.call_args[0][1] + assert "reasoning_effort" not in request_obj.payload + # Should use max_tokens instead of max_completion_tokens + assert "max_tokens" in request_obj.payload + assert "max_completion_tokens" not in request_obj.payload + + @pytest.mark.asyncio + async def test_reasoning_models_detection(self, mock_llm, default_usage): + """ + Tests that different reasoning model prefixes are correctly detected. + """ + reasoning_models = [ + "o1-preview", + "o1-mini", + "o3-mini", + "o4-preview", + "gpt-5", + "gpt-5.1", + ] + + for model in reasoning_models: + # Setup mock executor + mock_llm.executor.execute = AsyncMock( + return_value=self.create_text_response( + "Test response", usage=default_usage + ) + ) + + # Mock select_model + mock_llm.select_model = AsyncMock(return_value=model) + + # Call LLM + await mock_llm.generate( + "Test query", + request_params=RequestParams(model=model, reasoning_effort="low"), + ) + + # Verify reasoning_effort is applied + request_obj = mock_llm.executor.execute.call_args[0][1] + assert "reasoning_effort" in request_obj.payload, ( + f"reasoning_effort should be applied for {model}" + ) + assert request_obj.payload["reasoning_effort"] == "low"