From 70b4a3662cb751389ca9ab7014bcee46e9178dc6 Mon Sep 17 00:00:00 2001
From: dengyunyang <584797741@qq.com>
Date: Tue, 25 Nov 2025 20:17:59 +0800
Subject: [PATCH] bugfix cache miss

---
 vllm/multimodal/cache.py      | 74 ++++++++++++++++++++++++++++++++++-
 vllm/multimodal/processing.py |  7 ++++
 2 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/vllm/multimodal/cache.py b/vllm/multimodal/cache.py
index 642ec3fd7e3f..a2ab5bcc87a5 100644
--- a/vllm/multimodal/cache.py
+++ b/vllm/multimodal/cache.py
@@ -285,6 +285,28 @@ def is_cached(self, mm_hashes: list[str]) -> list[bool]:
         """
         return [self.is_cached_item(mm_hash) for mm_hash in mm_hashes]
 
+    @abstractmethod
+    def update_cache_item_eviction_order(self, mm_hash: str) -> None:
+        """
+        Touch a multi-modal item in the underlying cache.
+        If underlying cache is not LRU, it will not have effect
+
+        Args:
+          mm_hash: The hash of the item to touch.
+        """
+        raise NotImplementedError
+
+    def update_cache_eviction_order(self, mm_hashes: list[str]) -> None:
+        """
+        Touch a sequence of multi-modal item in the underlying cache.
+        If underlying cache is not LRU, it will not have effect
+
+        Args:
+            mm_hashes: The hash of each item to touch.
+        """
+        for mm_hash in mm_hashes:
+            self.update_cache_item_eviction_order(mm_hash)
+
 
 class MultiModalProcessorOnlyCache(BaseMultiModalProcessorCache):
     """
@@ -330,6 +352,10 @@ def get_and_update_item(
     def clear_cache(self) -> None:
         self._cache.clear()
 
+    @override
+    def update_cache_item_eviction_order(self, mm_hash: str) -> None:
+        self._cache.touch(mm_hash)
+
 
 class MultiModalProcessorSenderCache(BaseMultiModalProcessorCache):
     """
@@ -380,6 +406,10 @@ def get_and_update_item(
     def clear_cache(self) -> None:
         self._cache.clear()
 
+    @override
+    def update_cache_item_eviction_order(self, mm_hash: str) -> None:
+        self._cache.touch(mm_hash)
+
 
 class ShmObjectStoreSenderCache(BaseMultiModalProcessorCache):
     """
@@ -419,6 +449,10 @@ def __init__(self, vllm_config: "VllmConfig") -> None:
     def is_cached_item(self, mm_hash: str) -> bool:
         return self._shm_cache.is_cached(mm_hash)
 
+    @override
+    def update_cache_item_eviction_order(self, mm_hash: str) -> None:
+        return None
+
     @override
     def get_and_update_item(
         self,
@@ -550,12 +584,42 @@ def get_and_update_features(
         self,
         mm_features: list["MultiModalFeatureSpec"],
     ) -> list["MultiModalFeatureSpec"]:
-        """Update multimodal features with cached encoder outputs."""
+        """
+        Update multimodal features with cached encoder outputs.
+        Touch all identifier at first before update to avoid
+        item in updated list evict during update.
+        """
+        updated_mm_identifiers = [
+            feature.identifier for feature in mm_features
+        ]
+        self.update_cache_eviction_order(updated_mm_identifiers)
         for feature in mm_features:
             feature.data = self.get_and_update_item(feature.data,
                                                     feature.identifier)
         return mm_features
 
+    @abstractmethod
+    def update_cache_item_eviction_order(self, mm_hash: str) -> None:
+        """
+        Touch a multi-modal item in the underlying cache.
+        If underlying cache is not LRU, it will not have effect
+
+        Args:
+            mm_hash: The hash of the item to touch.
+        """
+        raise NotImplementedError
+
+    def update_cache_eviction_order(self, mm_hashes: list[str]) -> None:
+        """
+        Touch a sequence of multi-modal item in the underlying cache.
+        If underlying cache is not LRU, it will not have effect
+
+        Args:
+            mm_hashes: The hash of each item to touch.
+        """
+        for mm_hash in mm_hashes:
+            self.update_cache_item_eviction_order(mm_hash)
+
 
 class MultiModalReceiverCache(BaseMultiModalReceiverCache):
     """
@@ -596,6 +660,10 @@ def get_and_update_item(
     def clear_cache(self) -> None:
         self._cache.clear()
 
+    @override
+    def update_cache_item_eviction_order(self, mm_hash: str) -> None:
+        self._cache.touch(mm_hash)
+
 
 class ShmObjectStoreReceiverCache(BaseMultiModalReceiverCache):
     """
@@ -649,6 +717,10 @@ def get_and_update_item(
     def clear_cache(self) -> None:
         self._shm_cache.clear()
 
+    @override
+    def update_cache_item_eviction_order(self, mm_hash: str) -> None:
+        return None
+
 
 def engine_receiver_cache_from_config(
     vllm_config: "VllmConfig",
diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py
index ce671479b1ae..7f645efdd17e 100644
--- a/vllm/multimodal/processing.py
+++ b/vllm/multimodal/processing.py
@@ -1696,6 +1696,13 @@ def _merge_mm_kwargs(
             for modality, hashes in mm_hashes.items()
         }
 
+        # Need to touch all mm hashes before update to avoid hash in updated
+        # list evict during update
+        updated_mm_hashes = [
+            item_hash for hashes in mm_hashes.values() for item_hash in hashes
+        ]
+        cache.update_cache_eviction_order(updated_mm_hashes)
+
         mm_missing_next_idx = defaultdict[str, int](lambda: 0)
 
         merged_kwargs = defaultdict[str,