Skip to content

HELP!I meet some trouble with tockenizer!!! #68

@liubingnan56-ai

Description

@liubingnan56-ai

environmet:
accelerate==1.8.1
aiohappyeyeballs==2.6.1
aiohttp==3.12.13
aiosignal==1.4.0
annotated-types==0.7.0
anyio==4.9.0
attrs==25.3.0
blobfile==3.0.0
cachetools==5.5.2
certifi==2025.8.3
charset-normalizer==3.4.3
cnpip==1.2.2
contourpy==1.3.2
cycler==0.12.1
datasets==3.6.0
dill==0.3.8
distro==1.9.0
einops==0.8.1
filelock==3.18.0
fonttools==4.58.5
frozenlist==1.7.0
fsspec==2025.3.0
google-auth==2.40.3
google-genai==1.24.0
h11==0.16.0
hf-xet==1.1.5
httpcore==1.0.9
httpx==0.28.1
huggingface-hub==0.33.2
idna==3.10
Jinja2==3.1.6
jiter==0.10.0
kiwisolver==1.4.8
lxml==6.0.0
MarkupSafe==3.0.2
matplotlib==3.10.3
mpmath==1.3.0
multidict==6.6.3
multiprocess==0.70.16
networkx==3.5
numpy==2.3.1
nvidia-cublas-cu12==12.6.4.1
nvidia-cuda-cupti-cu12==12.6.80
nvidia-cuda-nvrtc-cu12==12.6.77
nvidia-cuda-runtime-cu12==12.6.77
nvidia-cudnn-cu12==9.5.1.17
nvidia-cufft-cu12==11.3.0.4
nvidia-cufile-cu12==1.11.1.6
nvidia-curand-cu12==10.3.7.77
nvidia-cusolver-cu12==11.7.1.2
nvidia-cusparse-cu12==12.5.4.2
nvidia-cusparselt-cu12==0.6.3
nvidia-nccl-cu12==2.26.2
nvidia-nvjitlink-cu12==12.6.85
nvidia-nvtx-cu12==12.6.77
openai==1.93.0
packaging==25.0
pandas==2.3.0
pillow==11.3.0
polars==1.31.0
propcache==0.3.2
protobuf==6.31.1
psutil==7.0.0
pyarrow==20.0.0
pyasn1==0.6.1
pyasn1_modules==0.4.2
pycryptodomex==3.23.0
pydantic==2.11.7
pydantic_core==2.33.2
pyparsing==3.2.3
python-dateutil==2.9.0.post0
pytz==2025.2
PyYAML==6.0.2
regex==2024.11.6
requests==2.32.5
rsa==4.9.1
safetensors==0.5.3
setuptools==80.9.0
six==1.17.0
sniffio==1.3.1
sympy==1.14.0
tenacity==8.5.0
tiktoken==0.9.0
timm==1.0.16
tokenizers==0.20.3
torch==2.7.1
torchvision==0.22.1
tqdm==4.67.1
transformers==4.45.1
triton==3.3.1
typing-inspection==0.4.1
typing_extensions==4.14.0
tzdata==2025.2
urllib3==2.5.0
websockets==15.0.1
wheel==0.45.1
xxhash==3.5.0
yarl==1.20.1

Hard device: nvidia A800 80G

when I use the script:

import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoProcessor

model_path = "/HOME/uestc_rhuang/uestc_rhuang_1/HDD_POOL/aNan/model/moonshotai/Kimi-VL-A3B-Thinking-2506"
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype="auto",
    device_map="auto",
    trust_remote_code=True,
)
# If flash-attn has been installed, it is recommended to set torch_dtype=torch.bfloat16 and attn_implementation="flash_attention_2"
# to save memory and speed up inference
# model = AutoModelForCausalLM.from_pretrained(
#     model_path,
#     torch_dtype=torch.bfloat16,
#     device_map="auto",
#     trust_remote_code=True,
#     attn_implementation="flash_attention_2"
# )
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)

image_paths = ["costom_neuron/dataset/hallusion_bench/VD/math/0_0.png", "costom_neuron/dataset/hallusion_bench/VD/math/0_1.png"]
images = [Image.open(path).convert("RGB") for path in image_paths]
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "image": image_path} for image_path in image_paths
        ] + [{"type": "text", "text": "Please infer step by step who this manuscript belongs to and what it records"}],
    },
]
prompt = processor.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
inputs = processor(images=images, text=prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)
generated_ids = model.generate(**inputs, max_new_tokens=32768, temperature=0.8)
generated_ids_trimmed = [
    out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
response = processor.batch_decode(
    generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)[0]
print(response)

firstly,I meet the error:

AssertionError: text_pair input must of type str (single example), List[str] (batch or single pretok

Then,I add sentence to transform the value named "promtp" into str:

import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoProcessor

model_path = "/HOME/uestc_rhuang/uestc_rhuang_1/HDD_POOL/aNan/model/moonshotai/Kimi-VL-A3B-Thinking-2506"
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype="auto",
    device_map="auto",
    trust_remote_code=True,
)
# If flash-attn has been installed, it is recommended to set torch_dtype=torch.bfloat16 and attn_implementation="flash_attention_2"
# to save memory and speed up inference
# model = AutoModelForCausalLM.from_pretrained(
#     model_path,
#     torch_dtype=torch.bfloat16,
#     device_map="auto",
#     trust_remote_code=True,
#     attn_implementation="flash_attention_2"
# )
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)

image_paths = ["costom_neuron/dataset/hallusion_bench/VD/math/0_0.png", "costom_neuron/dataset/hallusion_bench/VD/math/0_1.png"]
images = [Image.open(path).convert("RGB") for path in image_paths]
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "image": image_path} for image_path in image_paths
        ] + [{"type": "text", "text": "Please infer step by step who this manuscript belongs to and what it records"}],
    },
]
prompt = processor.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
if not isinstance(prompt, str):
    if hasattr(prompt, 'tolist'):  # 如果是tensor
        prompt = str(prompt.tolist())
    else:
        prompt = str(prompt)
inputs = processor(images=images, text=prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)
generated_ids = model.generate(**inputs, max_new_tokens=32768, temperature=0.8)
generated_ids_trimmed = [
    out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
response = processor.batch_decode(
    generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)[0]
print(response)

and then ,I will meet the warning :

Keyword arguments {'images': [<PIL.PngImagePlugin.PngImageFile image mode=P size=1340x742 at 0x7F2387117170>, <PIL.PngImagePlugin.PngImageFile image mode=P size=1308x770 at 0x7F236A35FBF0>]} not recognized.

This warning will let the model not use the image .

If you have any ideas about this problem ,please tell me!Thanks for your help!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions