@@ -3329,6 +3329,60 @@ def __init__(self):
33293329 f"[STT] Voice server configured ({ voice_url } ) - skipping local model loading"
33303330 )
33313331
3332+ # Pre-load IMG and VIDEO models in image server mode to avoid cold-start delays.
3333+ # In image server mode, these models are kept resident after first use anyway,
3334+ # so pre-loading them at startup eliminates the 10+ minute wait on the first request.
3335+ if is_image_server_mode ():
3336+ IMG_MODEL = getenv ("IMG_MODEL" )
3337+ if IMG_MODEL and IMG_MODEL .lower () != "none" and img_import_success :
3338+ logging .info (
3339+ f"[IMG] Image server mode - pre-loading { IMG_MODEL } to keep resident"
3340+ )
3341+ start_time = time .time ()
3342+ try :
3343+ self .img = IMG (
3344+ model = IMG_MODEL ,
3345+ local_uri = getenv ("EZLOCALAI_URL" ),
3346+ device = "cuda" ,
3347+ )
3348+ load_time = time .time () - start_time
3349+ logging .info (
3350+ f"[IMG] { IMG_MODEL } loaded in { load_time :.1f} s (image server mode - staying loaded)"
3351+ )
3352+ self .resource_manager .register_model (
3353+ ModelType .IMG , IMG_MODEL , "cuda" , vram_gb = 4.0
3354+ )
3355+ except Exception as e :
3356+ logging .warning (
3357+ f"[IMG] Failed to pre-load { IMG_MODEL } : { e } . Will lazy-load on first request."
3358+ )
3359+ self .img = None
3360+
3361+ VIDEO_MODEL = getenv ("VIDEO_MODEL" )
3362+ if VIDEO_MODEL and VIDEO_MODEL .lower () != "none" and video_import_success :
3363+ logging .info (
3364+ f"[VIDEO] Image server mode - pre-loading { VIDEO_MODEL } to keep resident"
3365+ )
3366+ start_time = time .time ()
3367+ try :
3368+ self .video = VIDEO (
3369+ model = VIDEO_MODEL ,
3370+ local_uri = getenv ("EZLOCALAI_URL" ),
3371+ device = "cuda" ,
3372+ )
3373+ load_time = time .time () - start_time
3374+ logging .info (
3375+ f"[VIDEO] { VIDEO_MODEL } loaded in { load_time :.1f} s (image server mode - staying loaded)"
3376+ )
3377+ self .resource_manager .register_model (
3378+ ModelType .VIDEO , VIDEO_MODEL , "cuda" , vram_gb = 12.0
3379+ )
3380+ except Exception as e :
3381+ logging .warning (
3382+ f"[VIDEO] Failed to pre-load { VIDEO_MODEL } : { e } . Will lazy-load on first request."
3383+ )
3384+ self .video = None
3385+
33323386 NGROK_TOKEN = getenv ("NGROK_TOKEN" )
33333387 if NGROK_TOKEN :
33343388 ngrok .set_auth_token (NGROK_TOKEN )
0 commit comments