Skip to content

Commit 073b7e4

Browse files
authored
fix: added missing exit_stack.close() to /v1/chat/completions (#1796)
* fix: added missing exit_stack.close() to /v1/chat/completions * fix: added missing exit_stack.close() to /v1/completions
1 parent 77a12a3 commit 073b7e4

File tree

2 files changed

+20
-9
lines changed

2 files changed

+20
-9
lines changed

llama_cpp/server/app.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -314,10 +314,14 @@ async def create_completion(
314314
else:
315315
kwargs["logits_processor"].extend(_min_tokens_logits_processor)
316316

317-
iterator_or_completion: Union[
318-
llama_cpp.CreateCompletionResponse,
319-
Iterator[llama_cpp.CreateCompletionStreamResponse],
320-
] = await run_in_threadpool(llama, **kwargs)
317+
try:
318+
iterator_or_completion: Union[
319+
llama_cpp.CreateCompletionResponse,
320+
Iterator[llama_cpp.CreateCompletionStreamResponse],
321+
] = await run_in_threadpool(llama, **kwargs)
322+
except Exception as err:
323+
exit_stack.close()
324+
raise err
321325

322326
if isinstance(iterator_or_completion, Iterator):
323327
# EAFP: It's easier to ask for forgiveness than permission
@@ -344,6 +348,7 @@ def iterator() -> Iterator[llama_cpp.CreateCompletionStreamResponse]:
344348
ping_message_factory=_ping_message_factory,
345349
)
346350
else:
351+
exit_stack.close()
347352
return iterator_or_completion
348353

349354

@@ -508,9 +513,13 @@ async def create_chat_completion(
508513
else:
509514
kwargs["logits_processor"].extend(_min_tokens_logits_processor)
510515

511-
iterator_or_completion: Union[
512-
llama_cpp.ChatCompletion, Iterator[llama_cpp.ChatCompletionChunk]
513-
] = await run_in_threadpool(llama.create_chat_completion, **kwargs)
516+
try:
517+
iterator_or_completion: Union[
518+
llama_cpp.ChatCompletion, Iterator[llama_cpp.ChatCompletionChunk]
519+
] = await run_in_threadpool(llama.create_chat_completion, **kwargs)
520+
except Exception as err:
521+
exit_stack.close()
522+
raise err
514523

515524
if isinstance(iterator_or_completion, Iterator):
516525
# EAFP: It's easier to ask for forgiveness than permission

llama_cpp/server/errors.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,6 @@ def error_message_wrapper(
134134
] = None,
135135
) -> Tuple[int, ErrorResponse]:
136136
"""Wraps error message in OpenAI style error response"""
137-
print(f"Exception: {str(error)}", file=sys.stderr)
138-
traceback.print_exc(file=sys.stderr)
139137
if body is not None and isinstance(
140138
body,
141139
(
@@ -149,6 +147,10 @@ def error_message_wrapper(
149147
if match is not None:
150148
return callback(body, match)
151149

150+
# Only print the trace on unexpected exceptions
151+
print(f"Exception: {str(error)}", file=sys.stderr)
152+
traceback.print_exc(file=sys.stderr)
153+
152154
# Wrap other errors as internal server error
153155
return 500, ErrorResponse(
154156
message=str(error),

0 commit comments

Comments
 (0)