HELP!I meet some trouble with tockenizer!!!

environmet:
accelerate==1.8.1
aiohappyeyeballs==2.6.1
aiohttp==3.12.13
aiosignal==1.4.0
annotated-types==0.7.0
anyio==4.9.0
attrs==25.3.0
blobfile==3.0.0
cachetools==5.5.2
certifi==2025.8.3
charset-normalizer==3.4.3
cnpip==1.2.2
contourpy==1.3.2
cycler==0.12.1
datasets==3.6.0
dill==0.3.8
distro==1.9.0
einops==0.8.1
filelock==3.18.0
fonttools==4.58.5
frozenlist==1.7.0
fsspec==2025.3.0
google-auth==2.40.3
google-genai==1.24.0
h11==0.16.0
hf-xet==1.1.5
httpcore==1.0.9
httpx==0.28.1
huggingface-hub==0.33.2
idna==3.10
Jinja2==3.1.6
jiter==0.10.0
kiwisolver==1.4.8
lxml==6.0.0
MarkupSafe==3.0.2
matplotlib==3.10.3
mpmath==1.3.0
multidict==6.6.3
multiprocess==0.70.16
networkx==3.5
numpy==2.3.1
nvidia-cublas-cu12==12.6.4.1
nvidia-cuda-cupti-cu12==12.6.80
nvidia-cuda-nvrtc-cu12==12.6.77
nvidia-cuda-runtime-cu12==12.6.77
nvidia-cudnn-cu12==9.5.1.17
nvidia-cufft-cu12==11.3.0.4
nvidia-cufile-cu12==1.11.1.6
nvidia-curand-cu12==10.3.7.77
nvidia-cusolver-cu12==11.7.1.2
nvidia-cusparse-cu12==12.5.4.2
nvidia-cusparselt-cu12==0.6.3
nvidia-nccl-cu12==2.26.2
nvidia-nvjitlink-cu12==12.6.85
nvidia-nvtx-cu12==12.6.77
openai==1.93.0
packaging==25.0
pandas==2.3.0
pillow==11.3.0
polars==1.31.0
propcache==0.3.2
protobuf==6.31.1
psutil==7.0.0
pyarrow==20.0.0
pyasn1==0.6.1
pyasn1_modules==0.4.2
pycryptodomex==3.23.0
pydantic==2.11.7
pydantic_core==2.33.2
pyparsing==3.2.3
python-dateutil==2.9.0.post0
pytz==2025.2
PyYAML==6.0.2
regex==2024.11.6
requests==2.32.5
rsa==4.9.1
safetensors==0.5.3
setuptools==80.9.0
six==1.17.0
sniffio==1.3.1
sympy==1.14.0
tenacity==8.5.0
tiktoken==0.9.0
timm==1.0.16
tokenizers==0.20.3
torch==2.7.1
torchvision==0.22.1
tqdm==4.67.1
transformers==4.45.1
triton==3.3.1
typing-inspection==0.4.1
typing_extensions==4.14.0
tzdata==2025.2
urllib3==2.5.0
websockets==15.0.1
wheel==0.45.1
xxhash==3.5.0
yarl==1.20.1


Hard device: nvidia A800 80G


when I use the script:
```
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoProcessor

model_path = "/HOME/uestc_rhuang/uestc_rhuang_1/HDD_POOL/aNan/model/moonshotai/Kimi-VL-A3B-Thinking-2506"
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype="auto",
    device_map="auto",
    trust_remote_code=True,
)
# If flash-attn has been installed, it is recommended to set torch_dtype=torch.bfloat16 and attn_implementation="flash_attention_2"
# to save memory and speed up inference
# model = AutoModelForCausalLM.from_pretrained(
#     model_path,
#     torch_dtype=torch.bfloat16,
#     device_map="auto",
#     trust_remote_code=True,
#     attn_implementation="flash_attention_2"
# )
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)

image_paths = ["costom_neuron/dataset/hallusion_bench/VD/math/0_0.png", "costom_neuron/dataset/hallusion_bench/VD/math/0_1.png"]
images = [Image.open(path).convert("RGB") for path in image_paths]
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "image": image_path} for image_path in image_paths
        ] + [{"type": "text", "text": "Please infer step by step who this manuscript belongs to and what it records"}],
    },
]
prompt = processor.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
inputs = processor(images=images, text=prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)
generated_ids = model.generate(**inputs, max_new_tokens=32768, temperature=0.8)
generated_ids_trimmed = [
    out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
response = processor.batch_decode(
    generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)[0]
print(response)

```
firstly,I meet the error:

```
AssertionError: text_pair input must of type str (single example), List[str] (batch or single pretok
```

Then,I add sentence to transform the value named "promtp" into str:

```
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoProcessor

model_path = "/HOME/uestc_rhuang/uestc_rhuang_1/HDD_POOL/aNan/model/moonshotai/Kimi-VL-A3B-Thinking-2506"
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype="auto",
    device_map="auto",
    trust_remote_code=True,
)
# If flash-attn has been installed, it is recommended to set torch_dtype=torch.bfloat16 and attn_implementation="flash_attention_2"
# to save memory and speed up inference
# model = AutoModelForCausalLM.from_pretrained(
#     model_path,
#     torch_dtype=torch.bfloat16,
#     device_map="auto",
#     trust_remote_code=True,
#     attn_implementation="flash_attention_2"
# )
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)

image_paths = ["costom_neuron/dataset/hallusion_bench/VD/math/0_0.png", "costom_neuron/dataset/hallusion_bench/VD/math/0_1.png"]
images = [Image.open(path).convert("RGB") for path in image_paths]
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "image": image_path} for image_path in image_paths
        ] + [{"type": "text", "text": "Please infer step by step who this manuscript belongs to and what it records"}],
    },
]
prompt = processor.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
if not isinstance(prompt, str):
    if hasattr(prompt, 'tolist'):  # 如果是tensor
        prompt = str(prompt.tolist())
    else:
        prompt = str(prompt)
inputs = processor(images=images, text=prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)
generated_ids = model.generate(**inputs, max_new_tokens=32768, temperature=0.8)
generated_ids_trimmed = [
    out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
response = processor.batch_decode(
    generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)[0]
print(response)
```


and then ,I will meet the warning :


```
Keyword arguments {'images': [<PIL.PngImagePlugin.PngImageFile image mode=P size=1340x742 at 0x7F2387117170>, <PIL.PngImagePlugin.PngImageFile image mode=P size=1308x770 at 0x7F236A35FBF0>]} not recognized.
```
This warning will let the model not use the image .

If you have any ideas about this problem ,please tell me!Thanks for your help!

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

HELP!I meet some trouble with tockenizer!!! #68

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

HELP!I meet some trouble with tockenizer!!! #68

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions