Skip to content
2 changes: 1 addition & 1 deletion src/memos/configs/mem_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def parse_datetime(cls, value):
)
image_parser_llm: LLMConfigFactory | None = Field(
default=None,
description="Vision LLM for image parsing. Falls back to main llm if not set.",
description="Vision LLM for image parsing. Falls back to general_llm if not set.",
)
embedder: EmbedderConfigFactory = Field(
..., description="Embedder configuration for the MemReader"
Expand Down
7 changes: 3 additions & 4 deletions src/memos/mem_reader/multi_modal_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,12 @@ def __init__(self, config: MultiModalStructMemReaderConfig):
super().__init__(simple_config)

# Image parser LLM (requires vision model)
# Falls back to main llm if not configured
# Falls back to general_llm if not configured (general_llm itself falls back to main llm)
self.image_parser_llm = (
LLMFactory.from_config(config.image_parser_llm)
if config.image_parser_llm is not None
else self.llm
else self.general_llm
)

# Initialize MultiModalParser for routing to different parsers
# Pass image_parser_llm for image parsing
self.multi_modal_parser = MultiModalParser(
Expand Down Expand Up @@ -1105,7 +1104,7 @@ def _process_transfer_multi_modal_data(
)
# Add preference memory extraction
future_pref = executor.submit(
process_preference_fine, raw_nodes, info, self.llm, self.embedder, **kwargs
process_preference_fine, raw_nodes, info, self.general_llm, self.embedder, **kwargs
)

# Collect results
Expand Down