From 70b4a3662cb751389ca9ab7014bcee46e9178dc6 Mon Sep 17 00:00:00 2001 From: dengyunyang <584797741@qq.com> Date: Tue, 25 Nov 2025 20:17:59 +0800 Subject: [PATCH] bugfix cache miss --- vllm/multimodal/cache.py | 74 ++++++++++++++++++++++++++++++++++- vllm/multimodal/processing.py | 7 ++++ 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/vllm/multimodal/cache.py b/vllm/multimodal/cache.py index 642ec3fd7e3f..a2ab5bcc87a5 100644 --- a/vllm/multimodal/cache.py +++ b/vllm/multimodal/cache.py @@ -285,6 +285,28 @@ def is_cached(self, mm_hashes: list[str]) -> list[bool]: """ return [self.is_cached_item(mm_hash) for mm_hash in mm_hashes] + @abstractmethod + def update_cache_item_eviction_order(self, mm_hash: str) -> None: + """ + Touch a multi-modal item in the underlying cache. + If underlying cache is not LRU, it will not have effect + + Args: + mm_hash: The hash of the item to touch. + """ + raise NotImplementedError + + def update_cache_eviction_order(self, mm_hashes: list[str]) -> None: + """ + Touch a sequence of multi-modal item in the underlying cache. + If underlying cache is not LRU, it will not have effect + + Args: + mm_hashes: The hash of each item to touch. + """ + for mm_hash in mm_hashes: + self.update_cache_item_eviction_order(mm_hash) + class MultiModalProcessorOnlyCache(BaseMultiModalProcessorCache): """ @@ -330,6 +352,10 @@ def get_and_update_item( def clear_cache(self) -> None: self._cache.clear() + @override + def update_cache_item_eviction_order(self, mm_hash: str) -> None: + self._cache.touch(mm_hash) + class MultiModalProcessorSenderCache(BaseMultiModalProcessorCache): """ @@ -380,6 +406,10 @@ def get_and_update_item( def clear_cache(self) -> None: self._cache.clear() + @override + def update_cache_item_eviction_order(self, mm_hash: str) -> None: + self._cache.touch(mm_hash) + class ShmObjectStoreSenderCache(BaseMultiModalProcessorCache): """ @@ -419,6 +449,10 @@ def __init__(self, vllm_config: "VllmConfig") -> None: def is_cached_item(self, mm_hash: str) -> bool: return self._shm_cache.is_cached(mm_hash) + @override + def update_cache_item_eviction_order(self, mm_hash: str) -> None: + return None + @override def get_and_update_item( self, @@ -550,12 +584,42 @@ def get_and_update_features( self, mm_features: list["MultiModalFeatureSpec"], ) -> list["MultiModalFeatureSpec"]: - """Update multimodal features with cached encoder outputs.""" + """ + Update multimodal features with cached encoder outputs. + Touch all identifier at first before update to avoid + item in updated list evict during update. + """ + updated_mm_identifiers = [ + feature.identifier for feature in mm_features + ] + self.update_cache_eviction_order(updated_mm_identifiers) for feature in mm_features: feature.data = self.get_and_update_item(feature.data, feature.identifier) return mm_features + @abstractmethod + def update_cache_item_eviction_order(self, mm_hash: str) -> None: + """ + Touch a multi-modal item in the underlying cache. + If underlying cache is not LRU, it will not have effect + + Args: + mm_hash: The hash of the item to touch. + """ + raise NotImplementedError + + def update_cache_eviction_order(self, mm_hashes: list[str]) -> None: + """ + Touch a sequence of multi-modal item in the underlying cache. + If underlying cache is not LRU, it will not have effect + + Args: + mm_hashes: The hash of each item to touch. + """ + for mm_hash in mm_hashes: + self.update_cache_item_eviction_order(mm_hash) + class MultiModalReceiverCache(BaseMultiModalReceiverCache): """ @@ -596,6 +660,10 @@ def get_and_update_item( def clear_cache(self) -> None: self._cache.clear() + @override + def update_cache_item_eviction_order(self, mm_hash: str) -> None: + self._cache.touch(mm_hash) + class ShmObjectStoreReceiverCache(BaseMultiModalReceiverCache): """ @@ -649,6 +717,10 @@ def get_and_update_item( def clear_cache(self) -> None: self._shm_cache.clear() + @override + def update_cache_item_eviction_order(self, mm_hash: str) -> None: + return None + def engine_receiver_cache_from_config( vllm_config: "VllmConfig", diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py index ce671479b1ae..7f645efdd17e 100644 --- a/vllm/multimodal/processing.py +++ b/vllm/multimodal/processing.py @@ -1696,6 +1696,13 @@ def _merge_mm_kwargs( for modality, hashes in mm_hashes.items() } + # Need to touch all mm hashes before update to avoid hash in updated + # list evict during update + updated_mm_hashes = [ + item_hash for hashes in mm_hashes.values() for item_hash in hashes + ] + cache.update_cache_eviction_order(updated_mm_hashes) + mm_missing_next_idx = defaultdict[str, int](lambda: 0) merged_kwargs = defaultdict[str,