Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 32 additions & 18 deletions src/agentscope/model/_gemini_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,36 @@ async def __call__(

return parsed_response

def _extract_usage(
self,
usage_metadata: Any,
start_datetime: datetime,
) -> ChatUsage | None:
"""Extract ChatUsage from usage_metadata safely, returning None if
unavailable or if token counts are None.

Args:
usage_metadata:
The usage metadata object from the Gemini response.
start_datetime (`datetime`):
The start datetime of the generation.

Returns:
`ChatUsage | None`:
A ChatUsage object, or None if data is unavailable.
"""
if not usage_metadata:
return None
prompt_tokens = usage_metadata.prompt_token_count
total_tokens = usage_metadata.total_token_count
if prompt_tokens is not None and total_tokens is not None:
return ChatUsage(
input_tokens=prompt_tokens,
output_tokens=total_tokens - prompt_tokens,
time=(datetime.now() - start_datetime).total_seconds(),
)
return None

async def _parse_gemini_stream_generation_response(
self,
start_datetime: datetime,
Expand Down Expand Up @@ -383,14 +413,7 @@ async def _parse_gemini_stream_generation_response(
if text and structured_model:
metadata = _json_loads_with_repair(text)

usage = None
if chunk.usage_metadata:
usage = ChatUsage(
input_tokens=chunk.usage_metadata.prompt_token_count,
output_tokens=chunk.usage_metadata.total_token_count
- chunk.usage_metadata.prompt_token_count,
time=(datetime.now() - start_datetime).total_seconds(),
)
usage = self._extract_usage(chunk.usage_metadata, start_datetime)

# The content blocks for the current chunk
content_blocks: list = []
Expand Down Expand Up @@ -502,16 +525,7 @@ def _parse_gemini_generation_response(
if response.text and structured_model:
metadata = _json_loads_with_repair(response.text)

if response.usage_metadata:
usage = ChatUsage(
input_tokens=response.usage_metadata.prompt_token_count,
output_tokens=response.usage_metadata.total_token_count
- response.usage_metadata.prompt_token_count,
time=(datetime.now() - start_datetime).total_seconds(),
)

else:
usage = None
usage = self._extract_usage(response.usage_metadata, start_datetime)

return ChatResponse(
content=content_blocks + tool_calls,
Expand Down
Loading