Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples/basic/functions/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from mcp_agent.app import MCPApp
from mcp_agent.agents.agent import Agent
from mcp_agent.workflows.llm.augmented_llm_openai import OpenAIAugmentedLLM
from mcp_agent.workflows.llm.augmented_llm import RequestParams


def add_numbers(a: int, b: int) -> int:
Expand Down Expand Up @@ -44,6 +45,7 @@ async def calculate(expr: str, app_ctx: Optional[Context] = None) -> str:
llm = await math_agent.attach_llm(OpenAIAugmentedLLM)
result = await llm.generate_str(
message=expr,
request_params=RequestParams(model="gpt-5.1", reasoning_effort="none"),
)

logger.info(f"Expert math result: {result}")
Expand Down
1 change: 1 addition & 0 deletions schema/mcp-agent.config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1331,6 +1331,7 @@
"reasoning_effort": {
"default": "medium",
"enum": [
"none",
"low",
"medium",
"high"
Expand Down
2 changes: 1 addition & 1 deletion src/mcp_agent/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ class OpenAISettings(BaseSettings):
validation_alias=AliasChoices("api_key", "OPENAI_API_KEY", "openai__api_key"),
)

reasoning_effort: Literal["low", "medium", "high"] = Field(
reasoning_effort: Literal["none", "low", "medium", "high"] = Field(
default="medium",
validation_alias=AliasChoices(
"reasoning_effort", "OPENAI_REASONING_EFFORT", "openai__reasoning_effort"
Expand Down
8 changes: 8 additions & 0 deletions src/mcp_agent/workflows/llm/augmented_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
TypeVar,
Union,
TYPE_CHECKING,
Literal,
)

from opentelemetry import trace
Expand Down Expand Up @@ -195,6 +196,13 @@ class RequestParams(CreateMessageRequestParams):
Tool names should match exactly as they appear in the server's tool list.
"""

reasoning_effort: Optional[Literal["none", "low", "medium", "high"]] = None
"""
(OpenAI only) Controls the reasoning effort for o1/o3/o4/gpt-5/gpt-5.1 models.
Valid values: 'none', 'low', 'medium', 'high'
Ignored by other providers.
"""


class AugmentedLLMProtocol(Protocol, Generic[MessageParamT, MessageT]):
"""Protocol defining the interface for augmented LLMs"""
Expand Down
7 changes: 5 additions & 2 deletions src/mcp_agent/workflows/llm/augmented_llm_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,8 @@ async def generate(
# DEPRECATED: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens
# "max_tokens": params.maxTokens,
"max_completion_tokens": params.maxTokens,
"reasoning_effort": self._reasoning_effort,
"reasoning_effort": params.reasoning_effort
or self._reasoning_effort,
}
else:
arguments = {**arguments, "max_tokens": params.maxTokens}
Expand Down Expand Up @@ -558,7 +559,9 @@ def _ensure_no_additional_props_and_require_all(node: dict):
# DEPRECATED: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens
# "max_tokens": params.maxTokens,
payload["max_completion_tokens"] = params.maxTokens
payload["reasoning_effort"] = self._reasoning_effort
payload["reasoning_effort"] = (
params.reasoning_effort or self._reasoning_effort
)
else:
payload["max_tokens"] = params.maxTokens
user = params.user or getattr(self.context.config.openai, "user", None)
Expand Down
146 changes: 146 additions & 0 deletions tests/workflows/llm/test_augmented_llm_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,3 +690,149 @@ async def test_user_in_openai_config(self, mock_llm, default_usage):
# Check that the user field is present in the payload
request_obj = mock_llm.executor.execute.call_args[0][1]
assert request_obj.payload.get("user") == "config_user_id"

@pytest.mark.asyncio
async def test_reasoning_effort_in_payload(self, mock_llm, default_usage):
"""
Tests that reasoning_effort from RequestParams is correctly passed to the API payload.
"""
# Setup mock executor
mock_llm.executor.execute = AsyncMock(
return_value=self.create_text_response("Test response", usage=default_usage)
)

# IMPORTANT: Mock select_model to return a reasoning model
mock_llm.select_model = AsyncMock(return_value="gpt-5.1")

# Call LLM with custom reasoning_effort
await mock_llm.generate(
"Test query",
request_params=RequestParams(model="gpt-5.1", reasoning_effort="high"),
)

# Verify the payload contains reasoning_effort
request_obj = mock_llm.executor.execute.call_args[0][1]
assert request_obj.payload["reasoning_effort"] == "high"
assert request_obj.payload["model"] == "gpt-5.1"
# Should use max_completion_tokens for reasoning models
assert "max_completion_tokens" in request_obj.payload
assert "max_tokens" not in request_obj.payload

@pytest.mark.asyncio
async def test_reasoning_effort_fallback(self, mock_llm, default_usage):
"""
Tests that reasoning_effort falls back to config default when not specified.
"""
# Setup mock executor
mock_llm.executor.execute = AsyncMock(
return_value=self.create_text_response("Test response", usage=default_usage)
)

# Mock select_model to return a reasoning model
mock_llm.select_model = AsyncMock(return_value="gpt-5.1")

# Call LLM without specifying reasoning_effort (should use config default: "medium")
await mock_llm.generate(
"Test query", request_params=RequestParams(model="gpt-5.1")
)

# Verify the payload uses config default
request_obj = mock_llm.executor.execute.call_args[0][1]
assert request_obj.payload["reasoning_effort"] == "medium"

@pytest.mark.asyncio
async def test_reasoning_effort_values(self, mock_llm, default_usage):
"""
Tests that different reasoning_effort values are correctly passed.
"""
test_cases = ["none", "low", "medium", "high"]

for effort in test_cases:
# Setup mock executor
mock_llm.executor.execute = AsyncMock(
return_value=self.create_text_response(
f"Response with {effort}", usage=default_usage
)
)

# Mock select_model to return a reasoning model
mock_llm.select_model = AsyncMock(return_value="gpt-5.1")

# Call LLM with specific reasoning_effort
await mock_llm.generate(
"Test query",
request_params=RequestParams(model="gpt-5.1", reasoning_effort=effort),
)

# Verify the payload contains correct reasoning_effort
request_obj = mock_llm.executor.execute.call_args[0][1]
assert request_obj.payload["reasoning_effort"] == effort

@pytest.mark.asyncio
async def test_reasoning_effort_not_applied_to_non_reasoning_model(
self, mock_llm, default_usage
):
"""
Tests that reasoning_effort is not applied to non-reasoning models.
"""
# Setup mock executor
mock_llm.executor.execute = AsyncMock(
return_value=self.create_text_response("Test response", usage=default_usage)
)

# Mock select_model to return a NON-reasoning model
mock_llm.select_model = AsyncMock(return_value="gpt-4.1")

# Call LLM with non-reasoning model (even if reasoning_effort is specified)
await mock_llm.generate(
"Test query",
request_params=RequestParams(
model="gpt-4.1",
reasoning_effort="high", # This should be ignored
),
)

# Verify reasoning_effort is NOT in payload for non-reasoning models
request_obj = mock_llm.executor.execute.call_args[0][1]
assert "reasoning_effort" not in request_obj.payload
# Should use max_tokens instead of max_completion_tokens
assert "max_tokens" in request_obj.payload
assert "max_completion_tokens" not in request_obj.payload

@pytest.mark.asyncio
async def test_reasoning_models_detection(self, mock_llm, default_usage):
"""
Tests that different reasoning model prefixes are correctly detected.
"""
reasoning_models = [
"o1-preview",
"o1-mini",
"o3-mini",
"o4-preview",
"gpt-5",
"gpt-5.1",
]

for model in reasoning_models:
# Setup mock executor
mock_llm.executor.execute = AsyncMock(
return_value=self.create_text_response(
"Test response", usage=default_usage
)
)

# Mock select_model
mock_llm.select_model = AsyncMock(return_value=model)

# Call LLM
await mock_llm.generate(
"Test query",
request_params=RequestParams(model=model, reasoning_effort="low"),
)

# Verify reasoning_effort is applied
request_obj = mock_llm.executor.execute.call_args[0][1]
assert "reasoning_effort" in request_obj.payload, (
f"reasoning_effort should be applied for {model}"
)
assert request_obj.payload["reasoning_effort"] == "low"
Loading