encode prompt: move text_encoder to cuda
Traceback (most recent call last):
File "/teamspace/studios/this_studio/HunyuanVideo-Avatar/hymm_sp/sample_gpu_poor.py", line 108, in
main()
File "/teamspace/studios/this_studio/HunyuanVideo-Avatar/hymm_sp/sample_gpu_poor.py", line 82, in main
samples = hunyuan_video_sampler.predict(args, batch, wav2vec, feature_extractor, align_instance)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/teamspace/studios/this_studio/HunyuanVideo-Avatar/hymm_sp/sample_inference_audio.py", line 185, in predict
samples = self.pipeline(prompt=prompt,
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/teamspace/studios/this_studio/HunyuanVideo-Avatar/hymm_sp/diffusion/pipelines/pipeline_hunyuan_video_audio.py", line 972, in call
self.encode_prompt_audio_text_base(
File "/teamspace/studios/this_studio/HunyuanVideo-Avatar/hymm_sp/diffusion/pipelines/pipeline_hunyuan_video_audio.py", line 458, in encode_prompt_audio_text_base
prompt_outputs = text_encoder.encode(text_inputs, data_type=data_type)
File "/teamspace/studios/this_studio/HunyuanVideo-Avatar/hymm_sp/text_encoder/init.py", line 260, in encode
outputs = self.model(
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/utils/generic.py", line 918, in wrapper
output = func(self, *args, **kwargs)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/models/llava/modeling_llava.py", line 419, in forward
outputs = self.model(
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/utils/generic.py", line 918, in wrapper
output = func(self, *args, **kwargs)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/models/llava/modeling_llava.py", line 280, in forward
special_image_mask = self.get_placeholder_mask(
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/models/llava/modeling_llava.py", line 236, in get_placeholder_mask
raise ValueError(
ValueError: Image features and image tokens do not match: tokens: 0, features 2359296
encode prompt: move text_encoder to cuda
Traceback (most recent call last):
File "/teamspace/studios/this_studio/HunyuanVideo-Avatar/hymm_sp/sample_gpu_poor.py", line 108, in
main()
File "/teamspace/studios/this_studio/HunyuanVideo-Avatar/hymm_sp/sample_gpu_poor.py", line 82, in main
samples = hunyuan_video_sampler.predict(args, batch, wav2vec, feature_extractor, align_instance)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/teamspace/studios/this_studio/HunyuanVideo-Avatar/hymm_sp/sample_inference_audio.py", line 185, in predict
samples = self.pipeline(prompt=prompt,
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/teamspace/studios/this_studio/HunyuanVideo-Avatar/hymm_sp/diffusion/pipelines/pipeline_hunyuan_video_audio.py", line 972, in call
self.encode_prompt_audio_text_base(
File "/teamspace/studios/this_studio/HunyuanVideo-Avatar/hymm_sp/diffusion/pipelines/pipeline_hunyuan_video_audio.py", line 458, in encode_prompt_audio_text_base
prompt_outputs = text_encoder.encode(text_inputs, data_type=data_type)
File "/teamspace/studios/this_studio/HunyuanVideo-Avatar/hymm_sp/text_encoder/init.py", line 260, in encode
outputs = self.model(
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/utils/generic.py", line 918, in wrapper
output = func(self, *args, **kwargs)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/models/llava/modeling_llava.py", line 419, in forward
outputs = self.model(
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/utils/generic.py", line 918, in wrapper
output = func(self, *args, **kwargs)
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/models/llava/modeling_llava.py", line 280, in forward
special_image_mask = self.get_placeholder_mask(
File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/models/llava/modeling_llava.py", line 236, in get_placeholder_mask
raise ValueError(
ValueError: Image features and image tokens do not match: tokens: 0, features 2359296