Skip to content

Commit 5b19cee

Browse files
mudlersiddimore
authored andcommitted
feat(vllm): add support for image-to-text and video-to-text (mudler#3729)
* feat(vllm): add support for image-to-text Related to mudler#3670 Signed-off-by: Ettore Di Giacinto <[email protected]> * feat(vllm): add support for video-to-text Closes: mudler#2318 Signed-off-by: Ettore Di Giacinto <[email protected]> * feat(vllm): support CPU installations Signed-off-by: Ettore Di Giacinto <[email protected]> * feat(vllm): add bnb Signed-off-by: Ettore Di Giacinto <[email protected]> * chore: add docs reference Signed-off-by: Ettore Di Giacinto <[email protected]> * Apply suggestions from code review Signed-off-by: Ettore Di Giacinto <[email protected]> --------- Signed-off-by: Ettore Di Giacinto <[email protected]> Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent bb130ff commit 5b19cee

6 files changed

+91
-10
lines changed

backend/python/vllm/backend.py

+68-5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import signal
66
import sys
77
import os
8+
from typing import List
9+
from PIL import Image
810

911
import backend_pb2
1012
import backend_pb2_grpc
@@ -15,6 +17,8 @@
1517
from vllm.sampling_params import SamplingParams
1618
from vllm.utils import random_uuid
1719
from vllm.transformers_utils.tokenizer import get_tokenizer
20+
from vllm.multimodal.utils import fetch_image
21+
from vllm.assets.video import VideoAsset
1822

1923
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
2024

@@ -105,6 +109,7 @@ async def LoadModel(self, request, context):
105109
try:
106110
self.llm = AsyncLLMEngine.from_engine_args(engine_args)
107111
except Exception as err:
112+
print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
108113
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
109114

110115
try:
@@ -117,7 +122,7 @@ async def LoadModel(self, request, context):
117122
)
118123
except Exception as err:
119124
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
120-
125+
print("Model loaded successfully", file=sys.stderr)
121126
return backend_pb2.Result(message="Model loaded successfully", success=True)
122127

123128
async def Predict(self, request, context):
@@ -196,15 +201,33 @@ async def _predict(self, request, context, streaming=False):
196201
if request.Seed != 0:
197202
sampling_params.seed = request.Seed
198203

204+
# Extract image paths and process images
199205
prompt = request.Prompt
200-
201-
# If tokenizer template is enabled and messages are provided instead of prompt apply the tokenizer template
206+
207+
image_paths = request.Images
208+
image_data = [self.load_image(img_path) for img_path in image_paths]
209+
210+
videos_path = request.Videos
211+
video_data = [self.load_video(video_path) for video_path in videos_path]
212+
213+
# If tokenizer template is enabled and messages are provided instead of prompt, apply the tokenizer template
202214
if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
203215
prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
204216

205-
# Generate text
217+
# Generate text using the LLM engine
206218
request_id = random_uuid()
207-
outputs = self.llm.generate(prompt, sampling_params, request_id)
219+
print(f"Generating text with request_id: {request_id}", file=sys.stderr)
220+
outputs = self.llm.generate(
221+
{
222+
"prompt": prompt,
223+
"multi_modal_data": {
224+
"image": image_data if image_data else None,
225+
"video": video_data if video_data else None,
226+
} if image_data or video_data else None,
227+
},
228+
sampling_params=sampling_params,
229+
request_id=request_id,
230+
)
208231

209232
# Stream the results
210233
generated_text = ""
@@ -227,9 +250,49 @@ async def _predict(self, request, context, streaming=False):
227250
if streaming:
228251
return
229252

253+
# Remove the image files from /tmp folder
254+
for img_path in image_paths:
255+
try:
256+
os.remove(img_path)
257+
except Exception as e:
258+
print(f"Error removing image file: {img_path}, {e}", file=sys.stderr)
259+
230260
# Sending the final generated text
231261
yield backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
232262

263+
def load_image(self, image_path: str):
264+
"""
265+
Load an image from the given file path.
266+
267+
Args:
268+
image_path (str): The path to the image file.
269+
270+
Returns:
271+
Image: The loaded image.
272+
"""
273+
try:
274+
return Image.open(image_path)
275+
except Exception as e:
276+
print(f"Error loading image {image_path}: {e}", file=sys.stderr)
277+
return self.load_video(image_path)
278+
279+
def load_video(self, video_path: str):
280+
"""
281+
Load a video from the given file path.
282+
283+
Args:
284+
video_path (str): The path to the image file.
285+
286+
Returns:
287+
Video: The loaded video.
288+
"""
289+
try:
290+
video = VideoAsset(name=video_path).np_ndarrays
291+
return video
292+
except Exception as e:
293+
print(f"Error loading video {image_path}: {e}", file=sys.stderr)
294+
return None
295+
233296
async def serve(address):
234297
# Start asyncio gRPC server
235298
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))

backend/python/vllm/install.sh

+15-1
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,18 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
1313
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
1414
fi
1515

16-
installRequirements
16+
if [ "x${BUILD_TYPE}" == "x" ]; then
17+
ensureVenv
18+
# https://docs.vllm.ai/en/v0.6.1/getting_started/cpu-installation.html
19+
if [ ! -d vllm ]; then
20+
git clone https://github.com/vllm-project/vllm
21+
fi
22+
pushd vllm
23+
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.66.2 protobuf bitsandbytes
24+
uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
25+
VLLM_TARGET_DEVICE=cpu python setup.py install
26+
popd
27+
rm -rf vllm
28+
else
29+
installRequirements
30+
fi
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
--extra-index-url https://download.pytorch.org/whl/cu118
22
accelerate
33
torch
4-
transformers
4+
transformers
5+
bitsandbytes
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
accelerate
22
torch
3-
transformers
3+
transformers
4+
bitsandbytes
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
--extra-index-url https://download.pytorch.org/whl/rocm6.0
22
accelerate
33
torch
4-
transformers
4+
transformers
5+
bitsandbytes

backend/python/vllm/requirements-intel.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ accelerate
44
torch
55
transformers
66
optimum[openvino]
7-
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
7+
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
8+
bitsandbytes

0 commit comments

Comments
 (0)