diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index ca9095a8..715b3937 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -4276,8 +4276,9 @@ class NPUModelRunner(LoRAModelRunnerMixin, ECConnectorModelRunnerMixin): else: # This is the last chunk of prompt tokens to return. num_logits = num_remaining_tokens - completed_prefill_reqs.append(req_id) - prompt_logprobs_dict[req_id] = logprobs_tensors + if num_logits > 0: + completed_prefill_reqs.append(req_id) + prompt_logprobs_dict[req_id] = logprobs_tensors if num_logits <= 0: # This can happen for the final chunk if we prefilled exactly