fix assert error of sampled_token_ids shape

Ronald1995 · Ronald1995 · commit 682ab9596f37 · 2025-12-03T16:55:41.000+08:00
Signed-off-by: Ronald1995 &lt;ronaldautomobile@163.com&gt;
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -2626,7 +2626,7 @@ def sample_tokens(
 
             num_sampled_tokens = sampler_output.sampled_token_ids.shape[0]
             sampled_token_ids = sampler_output.sampled_token_ids
-            self.input_batch.prev_sampled_token_ids = None
+            
             if not self.use_async_scheduling:
                 # Get the valid generated tokens.
                 max_gen_len = sampled_token_ids.shape[-1]
@@ -2647,7 +2647,7 @@ def sample_tokens(
                 invalid_req_indices = discard_sampled_tokens_req_indices.tolist(
                 )
                 invalid_req_indices_set = set(invalid_req_indices)
-                if self.input_batch.prev_sampled_token_ids is None:
+                if self.num_spec_tokens <= 0:
                     assert sampled_token_ids.shape[-1] == 1
                     # Cache the sampled tokens on the NPU and avoid CPU sync.
                     # These will be copied into input_ids in the next step