Skip to content

Commit 4c9eeda

Browse files
committed
fix(request-audio): loop through model_names
1 parent b6dd717 commit 4c9eeda

File tree

3 files changed

+39
-100
lines changed

3 files changed

+39
-100
lines changed

src/vllm_router/service_discovery.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ class EndpointInfo:
9494
# Model label
9595
model_label: str
9696

97+
model_type: str
98+
9799
# Endpoint's sleep status
98100
sleep: bool
99101

@@ -306,13 +308,15 @@ def get_endpoint_info(self) -> List[EndpointInfo]:
306308
):
307309
continue
308310
model_label = self.model_labels[i] if self.model_labels else "default"
311+
model_type = self.model_types[i] if self.model_types else "default"
309312
endpoint_info = EndpointInfo(
310313
url=url,
311314
model_names=[model], # Convert single model to list
312315
Id=self.engines_id[i],
313316
sleep=False,
314317
added_timestamp=self.added_timestamp,
315318
model_label=model_label,
319+
model_type=model_type,
316320
model_info=self._get_model_info(model),
317321
)
318322
endpoint_infos.append(endpoint_info)

src/vllm_router/services/request_service/request.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -565,18 +565,13 @@ async def route_general_transcriptions(
565565

566566
endpoints = service_discovery.get_endpoint_info()
567567

568-
logger.debug("==== Total endpoints ====")
569-
logger.debug(endpoints)
570-
logger.debug("==== Total endpoints ====")
571-
572568
# filter the endpoints url by model name and label for transcriptions
573-
transcription_endpoints = [
574-
ep
575-
for ep in endpoints
576-
if model == ep.model_name
577-
and ep.model_label == "transcription"
578-
and not ep.sleep # Added ep.sleep == False
579-
]
569+
logger.debug(endpoints)
570+
transcription_endpoints = []
571+
for ep in endpoints:
572+
for model_name in ep.model_names:
573+
if model == model_name and ep.model_type == "transcription" and not ep.sleep:
574+
transcription_endpoints.append(ep)
580575

581576
logger.debug("====List of transcription endpoints====")
582577
logger.debug(transcription_endpoints)
@@ -620,10 +615,6 @@ async def route_general_transcriptions(
620615

621616
logger.info("Proxying transcription request for model %s to %s", model, chosen_url)
622617

623-
logger.debug("==== data payload keys ====")
624-
logger.debug(list(data.keys()))
625-
logger.debug("==== data payload keys ====")
626-
627618
try:
628619
client = request.app.state.aiohttp_client_wrapper()
629620

@@ -687,3 +678,9 @@ async def route_general_transcriptions(
687678
status_code=503,
688679
content={"error": f"Failed to connect to backend: {str(client_error)}"},
689680
)
681+
except Exception as e:
682+
logger.error(e)
683+
return JSONResponse(
684+
status_code=500,
685+
content={"error": f"Internal server error"},
686+
)

0 commit comments

Comments
 (0)