From 0d8c0f1a24a58011908b17994fffb5fcfebe5e22 Mon Sep 17 00:00:00 2001 From: JiangWeixiang <854746559@qq.com> Date: Wed, 10 Dec 2025 17:45:58 +0800 Subject: [PATCH] [Bugfix] Fix out-of-bounds access to token_id due to uninitialized logprobs (#4248) ### What this PR does / why we need it? The logprobs_tensor was not initialized before accessing its token_id member, leading to a crash when tokenizer.decode() is called by passing a negative token_id ### How was this patch tested? Constructed an inference request with two prompts and set SamplingParams(prompt_logprobs=) (e.g., prompt_logprobs=1). After applying the fix (proper initialization of logprobs_tensor), the same request completed successfully without errors, and the returned logprobs matched expected values. - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 Signed-off-by: jiangweixiang Co-authored-by: jiangweixiang Co-authored-by: Mengqing Cao --- vllm_ascend/worker/model_runner_v1.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index ca9095a8..715b3937 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -4276,8 +4276,9 @@ class NPUModelRunner(LoRAModelRunnerMixin, ECConnectorModelRunnerMixin): else: # This is the last chunk of prompt tokens to return. num_logits = num_remaining_tokens - completed_prefill_reqs.append(req_id) - prompt_logprobs_dict[req_id] = logprobs_tensors + if num_logits > 0: + completed_prefill_reqs.append(req_id) + prompt_logprobs_dict[req_id] = logprobs_tensors if num_logits <= 0: # This can happen for the final chunk if we prefilled exactly