From 0d8c0f1a24a58011908b17994fffb5fcfebe5e22 Mon Sep 17 00:00:00 2001
From: JiangWeixiang <854746559@qq.com>
Date: Wed, 10 Dec 2025 17:45:58 +0800
Subject: [PATCH] [Bugfix] Fix out-of-bounds access to token_id due to
 uninitialized logprobs (#4248)

### What this PR does / why we need it?
The logprobs_tensor was not initialized before accessing its token_id
member, leading to a crash when tokenizer.decode() is called by passing
a negative token_id

### How was this patch tested?
Constructed an inference request with two prompts and set
SamplingParams(prompt_logprobs=<non-None value>) (e.g.,
prompt_logprobs=1).
After applying the fix (proper initialization of logprobs_tensor), the
same request completed successfully without errors, and the returned
logprobs matched expected values.

- vLLM version: v0.12.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9

Signed-off-by: jiangweixiang <jwx02384838@antgroup.com>
Co-authored-by: jiangweixiang <jwx02384838@antgroup.com>
Co-authored-by: Mengqing Cao <cmq0113@163.com>
---
 vllm_ascend/worker/model_runner_v1.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
index ca9095a8..715b3937 100644
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -4276,8 +4276,9 @@ class NPUModelRunner(LoRAModelRunnerMixin, ECConnectorModelRunnerMixin):
             else:
                 # This is the last chunk of prompt tokens to return.
                 num_logits = num_remaining_tokens
-                completed_prefill_reqs.append(req_id)
-                prompt_logprobs_dict[req_id] = logprobs_tensors
+                if num_logits > 0:
+                    completed_prefill_reqs.append(req_id)
+                    prompt_logprobs_dict[req_id] = logprobs_tensors
 
             if num_logits <= 0:
                 # This can happen for the final chunk if we prefilled exactly