Skip to content

Commit b79c49c

Browse files
committed
warm load image server models
1 parent 0bfecf4 commit b79c49c

1 file changed

Lines changed: 54 additions & 0 deletions

File tree

Pipes.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3329,6 +3329,60 @@ def __init__(self):
33293329
f"[STT] Voice server configured ({voice_url}) - skipping local model loading"
33303330
)
33313331

3332+
# Pre-load IMG and VIDEO models in image server mode to avoid cold-start delays.
3333+
# In image server mode, these models are kept resident after first use anyway,
3334+
# so pre-loading them at startup eliminates the 10+ minute wait on the first request.
3335+
if is_image_server_mode():
3336+
IMG_MODEL = getenv("IMG_MODEL")
3337+
if IMG_MODEL and IMG_MODEL.lower() != "none" and img_import_success:
3338+
logging.info(
3339+
f"[IMG] Image server mode - pre-loading {IMG_MODEL} to keep resident"
3340+
)
3341+
start_time = time.time()
3342+
try:
3343+
self.img = IMG(
3344+
model=IMG_MODEL,
3345+
local_uri=getenv("EZLOCALAI_URL"),
3346+
device="cuda",
3347+
)
3348+
load_time = time.time() - start_time
3349+
logging.info(
3350+
f"[IMG] {IMG_MODEL} loaded in {load_time:.1f}s (image server mode - staying loaded)"
3351+
)
3352+
self.resource_manager.register_model(
3353+
ModelType.IMG, IMG_MODEL, "cuda", vram_gb=4.0
3354+
)
3355+
except Exception as e:
3356+
logging.warning(
3357+
f"[IMG] Failed to pre-load {IMG_MODEL}: {e}. Will lazy-load on first request."
3358+
)
3359+
self.img = None
3360+
3361+
VIDEO_MODEL = getenv("VIDEO_MODEL")
3362+
if VIDEO_MODEL and VIDEO_MODEL.lower() != "none" and video_import_success:
3363+
logging.info(
3364+
f"[VIDEO] Image server mode - pre-loading {VIDEO_MODEL} to keep resident"
3365+
)
3366+
start_time = time.time()
3367+
try:
3368+
self.video = VIDEO(
3369+
model=VIDEO_MODEL,
3370+
local_uri=getenv("EZLOCALAI_URL"),
3371+
device="cuda",
3372+
)
3373+
load_time = time.time() - start_time
3374+
logging.info(
3375+
f"[VIDEO] {VIDEO_MODEL} loaded in {load_time:.1f}s (image server mode - staying loaded)"
3376+
)
3377+
self.resource_manager.register_model(
3378+
ModelType.VIDEO, VIDEO_MODEL, "cuda", vram_gb=12.0
3379+
)
3380+
except Exception as e:
3381+
logging.warning(
3382+
f"[VIDEO] Failed to pre-load {VIDEO_MODEL}: {e}. Will lazy-load on first request."
3383+
)
3384+
self.video = None
3385+
33323386
NGROK_TOKEN = getenv("NGROK_TOKEN")
33333387
if NGROK_TOKEN:
33343388
ngrok.set_auth_token(NGROK_TOKEN)

0 commit comments

Comments
 (0)