Updated message trim to handle message better and support tool calling.#28
Updated message trim to handle message better and support tool calling.#28cableman wants to merge 1 commit into
Conversation
c0b3259 to
2061b20
Compare
|
Dette er et potentielt fiks til os2ai/Feedback#1. |
|
@lasseborly vil du lave code review? Vi kører den udenom den igangværende RC som et hot fix og ser om det kan være med til at løse Holstebros udfordring |
lasseborly
left a comment
There was a problem hiding this comment.
I have a hard time wrapping my head around the actual logic and how it solves the apparent problems. Not that it does not, I am just very removed from the problem space.
| # Context-window resolution: per-model map wins, then litellm's | ||
| # built-in get_max_tokens, then this global default. | ||
| self.default_max_context_tokens = default_config.get( | ||
| "default_max_context_tokens", 8192 |
There was a problem hiding this comment.
Can we document inline the reason for this magic number?
| ) | ||
| data["max_tokens"] = safe_completion_tokens | ||
|
|
||
| def _repair_tool_call_pairings(self, messages: list) -> list: |
There was a problem hiding this comment.
My gut tells me that we could make this at two levels flatter by going negative on the if conditions and using continue a bit more. Makes for a bit easier read.
There was a problem hiding this comment.
Also, maybe make use of the "new" match case instead of the if-else statements.
| data, safe_completion_tokens, has_max_tokens, has_max_completion | ||
| ) | ||
| current_tokens = final_tokens | ||
| except Exception as e: |
There was a problem hiding this comment.
We can get more specific here it seems: https://github.com/BerriAI/litellm/blob/144279eb57edb6cc0a97ad47c9da33b910f70dfa/litellm/litellm_core_utils/token_counter.py#L360
| result.append(msg) | ||
| return result | ||
|
|
||
| def _ensure_last_is_user( |
There was a problem hiding this comment.
I might be missing something, but are we even using this anywhere?
| self._log_debug(f"Safe completion tokens: {safe_completion_tokens}") | ||
| self._log_debug( | ||
| f"Calculation: min({requested_completion}, max(512, ({max_context_tokens} - {int(current_tokens)} - {self.safety_buffer}) * 0.90))" | ||
| f"Calculation: min({requested_completion}, max(256, ({max_context_tokens} - {int(current_tokens)} - {self.safety_buffer}) * 0.75))" |
There was a problem hiding this comment.
I do not understand this change? Why?
| "Dropping assistant message with no content and all tool_calls orphaned" | ||
| ) | ||
| continue | ||
| new_msg = dict(msg) |
| if tcs: | ||
| kept_tcs = [tc for tc in tcs if tc.get("id") in satisfied_ids] | ||
| content = msg.get("content") | ||
| if not kept_tcs and not (content or "").strip(): |
There was a problem hiding this comment.
content_empty = not (content or "").strip()
if not kept_tcs and content_empty:
| ) | ||
| self.max_context_tokens_by_model = default_config.get( | ||
| "max_context_tokens_by_model", {} | ||
| ) or {} |
See https://github.com/AarhusAI/aarhusai-docker/blob/main/guardrails/README.md for more information about the message trim guardrail.
New configuration knobs
New tool-calling repair logic
Behavior changes