1818 RateLimitDescriptorRateLimitObject ,
1919 _PROXY_MaxParallelRequestsHandler_v3 ,
2020)
21+ from litellm .proxy .hooks .rate_limiter_utils import convert_priority_to_percent
2122from litellm .proxy .utils import InternalUsageCache
2223from litellm .types .router import ModelGroupInfo
2324
@@ -48,7 +49,7 @@ def __init__(self, internal_usage_cache: DualCache):
4849 def update_variables (self , llm_router : Router ):
4950 self .llm_router = llm_router
5051
51- def _get_priority_weight (self , priority : Optional [str ]) -> float :
52+ def _get_priority_weight (self , priority : Optional [str ], model_info : Optional [ ModelGroupInfo ] = None ) -> float :
5253 """Get the weight for a given priority from litellm.priority_reservation"""
5354 weight : float = litellm .priority_reservation_settings .default_priority
5455 if (
@@ -64,19 +65,25 @@ def _get_priority_weight(self, priority: Optional[str]) -> float:
6465 "PREMIUM FEATURE: Reserving tpm/rpm by priority is a premium feature. Please add a 'LITELLM_LICENSE' to your .env to enable this.\n Get a license: https://docs.litellm.ai/docs/proxy/enterprise."
6566 )
6667 else :
67- weight = litellm .priority_reservation [priority ]
68+ value = litellm .priority_reservation [priority ]
69+ weight = convert_priority_to_percent (value , model_info )
6870 return weight
6971
70- def _normalize_priority_weights (self ) -> Dict [str , float ]:
72+ def _normalize_priority_weights (self , model_info : ModelGroupInfo ) -> Dict [str , float ]:
7173 """
7274 Normalize priority weights if they sum to > 1.0
7375
7476 Handles over-allocation: {key_a: 0.60, key_b: 0.80} -> {key_a: 0.43, key_b: 0.57}
77+ Converts absolute rpm/tpm values to percentages based on model capacity.
7578 """
7679 if litellm .priority_reservation is None :
7780 return {}
7881
79- weights = dict (litellm .priority_reservation )
82+ # Convert all values to percentages first
83+ weights : Dict [str , float ] = {}
84+ for k , v in litellm .priority_reservation .items ():
85+ weights [k ] = convert_priority_to_percent (v , model_info )
86+
8087 total_weight = sum (weights .values ())
8188
8289 if total_weight > 1.0 :
@@ -93,6 +100,7 @@ def _get_priority_allocation(
93100 model : str ,
94101 priority : Optional [str ],
95102 normalized_weights : Dict [str , float ],
103+ model_info : Optional [ModelGroupInfo ] = None ,
96104 ) -> tuple [float , str ]:
97105 """
98106 Get priority weight and pool key for a given priority.
@@ -104,6 +112,7 @@ def _get_priority_allocation(
104112 model: Model name
105113 priority: Priority level (None for default)
106114 normalized_weights: Pre-computed normalized weights
115+ model_info: Model configuration (optional, for fallback conversion)
107116
108117 Returns:
109118 tuple: (priority_weight, priority_key)
@@ -117,7 +126,7 @@ def _get_priority_allocation(
117126
118127 if has_explicit_priority and priority is not None :
119128 # Explicit priority: get its specific allocation
120- priority_weight = normalized_weights .get (priority , self ._get_priority_weight (priority ))
129+ priority_weight = normalized_weights .get (priority , self ._get_priority_weight (priority , model_info ))
121130 # Use unique key per priority level
122131 priority_key = f"{ model } :{ priority } "
123132 else :
@@ -232,11 +241,12 @@ def _create_priority_based_descriptors(
232241 return descriptors
233242
234243 # Get normalized priority weight and pool key
235- normalized_weights = self ._normalize_priority_weights ()
244+ normalized_weights = self ._normalize_priority_weights (model_group_info )
236245 priority_weight , priority_key = self ._get_priority_allocation (
237246 model = model ,
238247 priority = priority ,
239248 normalized_weights = normalized_weights ,
249+ model_info = model_group_info ,
240250 )
241251
242252 rate_limit_config : RateLimitDescriptorRateLimitObject = {}
0 commit comments