-
Notifications
You must be signed in to change notification settings - Fork 71
Description
environmet:
accelerate==1.8.1
aiohappyeyeballs==2.6.1
aiohttp==3.12.13
aiosignal==1.4.0
annotated-types==0.7.0
anyio==4.9.0
attrs==25.3.0
blobfile==3.0.0
cachetools==5.5.2
certifi==2025.8.3
charset-normalizer==3.4.3
cnpip==1.2.2
contourpy==1.3.2
cycler==0.12.1
datasets==3.6.0
dill==0.3.8
distro==1.9.0
einops==0.8.1
filelock==3.18.0
fonttools==4.58.5
frozenlist==1.7.0
fsspec==2025.3.0
google-auth==2.40.3
google-genai==1.24.0
h11==0.16.0
hf-xet==1.1.5
httpcore==1.0.9
httpx==0.28.1
huggingface-hub==0.33.2
idna==3.10
Jinja2==3.1.6
jiter==0.10.0
kiwisolver==1.4.8
lxml==6.0.0
MarkupSafe==3.0.2
matplotlib==3.10.3
mpmath==1.3.0
multidict==6.6.3
multiprocess==0.70.16
networkx==3.5
numpy==2.3.1
nvidia-cublas-cu12==12.6.4.1
nvidia-cuda-cupti-cu12==12.6.80
nvidia-cuda-nvrtc-cu12==12.6.77
nvidia-cuda-runtime-cu12==12.6.77
nvidia-cudnn-cu12==9.5.1.17
nvidia-cufft-cu12==11.3.0.4
nvidia-cufile-cu12==1.11.1.6
nvidia-curand-cu12==10.3.7.77
nvidia-cusolver-cu12==11.7.1.2
nvidia-cusparse-cu12==12.5.4.2
nvidia-cusparselt-cu12==0.6.3
nvidia-nccl-cu12==2.26.2
nvidia-nvjitlink-cu12==12.6.85
nvidia-nvtx-cu12==12.6.77
openai==1.93.0
packaging==25.0
pandas==2.3.0
pillow==11.3.0
polars==1.31.0
propcache==0.3.2
protobuf==6.31.1
psutil==7.0.0
pyarrow==20.0.0
pyasn1==0.6.1
pyasn1_modules==0.4.2
pycryptodomex==3.23.0
pydantic==2.11.7
pydantic_core==2.33.2
pyparsing==3.2.3
python-dateutil==2.9.0.post0
pytz==2025.2
PyYAML==6.0.2
regex==2024.11.6
requests==2.32.5
rsa==4.9.1
safetensors==0.5.3
setuptools==80.9.0
six==1.17.0
sniffio==1.3.1
sympy==1.14.0
tenacity==8.5.0
tiktoken==0.9.0
timm==1.0.16
tokenizers==0.20.3
torch==2.7.1
torchvision==0.22.1
tqdm==4.67.1
transformers==4.45.1
triton==3.3.1
typing-inspection==0.4.1
typing_extensions==4.14.0
tzdata==2025.2
urllib3==2.5.0
websockets==15.0.1
wheel==0.45.1
xxhash==3.5.0
yarl==1.20.1
Hard device: nvidia A800 80G
when I use the script:
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoProcessor
model_path = "/HOME/uestc_rhuang/uestc_rhuang_1/HDD_POOL/aNan/model/moonshotai/Kimi-VL-A3B-Thinking-2506"
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype="auto",
device_map="auto",
trust_remote_code=True,
)
# If flash-attn has been installed, it is recommended to set torch_dtype=torch.bfloat16 and attn_implementation="flash_attention_2"
# to save memory and speed up inference
# model = AutoModelForCausalLM.from_pretrained(
# model_path,
# torch_dtype=torch.bfloat16,
# device_map="auto",
# trust_remote_code=True,
# attn_implementation="flash_attention_2"
# )
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
image_paths = ["costom_neuron/dataset/hallusion_bench/VD/math/0_0.png", "costom_neuron/dataset/hallusion_bench/VD/math/0_1.png"]
images = [Image.open(path).convert("RGB") for path in image_paths]
messages = [
{
"role": "user",
"content": [
{"type": "image", "image": image_path} for image_path in image_paths
] + [{"type": "text", "text": "Please infer step by step who this manuscript belongs to and what it records"}],
},
]
prompt = processor.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
inputs = processor(images=images, text=prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)
generated_ids = model.generate(**inputs, max_new_tokens=32768, temperature=0.8)
generated_ids_trimmed = [
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
response = processor.batch_decode(
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)[0]
print(response)
firstly,I meet the error:
AssertionError: text_pair input must of type str (single example), List[str] (batch or single pretok
Then,I add sentence to transform the value named "promtp" into str:
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoProcessor
model_path = "/HOME/uestc_rhuang/uestc_rhuang_1/HDD_POOL/aNan/model/moonshotai/Kimi-VL-A3B-Thinking-2506"
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype="auto",
device_map="auto",
trust_remote_code=True,
)
# If flash-attn has been installed, it is recommended to set torch_dtype=torch.bfloat16 and attn_implementation="flash_attention_2"
# to save memory and speed up inference
# model = AutoModelForCausalLM.from_pretrained(
# model_path,
# torch_dtype=torch.bfloat16,
# device_map="auto",
# trust_remote_code=True,
# attn_implementation="flash_attention_2"
# )
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
image_paths = ["costom_neuron/dataset/hallusion_bench/VD/math/0_0.png", "costom_neuron/dataset/hallusion_bench/VD/math/0_1.png"]
images = [Image.open(path).convert("RGB") for path in image_paths]
messages = [
{
"role": "user",
"content": [
{"type": "image", "image": image_path} for image_path in image_paths
] + [{"type": "text", "text": "Please infer step by step who this manuscript belongs to and what it records"}],
},
]
prompt = processor.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
if not isinstance(prompt, str):
if hasattr(prompt, 'tolist'): # 如果是tensor
prompt = str(prompt.tolist())
else:
prompt = str(prompt)
inputs = processor(images=images, text=prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)
generated_ids = model.generate(**inputs, max_new_tokens=32768, temperature=0.8)
generated_ids_trimmed = [
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
response = processor.batch_decode(
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)[0]
print(response)
and then ,I will meet the warning :
Keyword arguments {'images': [<PIL.PngImagePlugin.PngImageFile image mode=P size=1340x742 at 0x7F2387117170>, <PIL.PngImagePlugin.PngImageFile image mode=P size=1308x770 at 0x7F236A35FBF0>]} not recognized.
This warning will let the model not use the image .
If you have any ideas about this problem ,please tell me!Thanks for your help!