From 742f679c7d56ebea9cee449ba2428940aa353bb9 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Sat, 26 Apr 2025 23:15:58 +0800 Subject: [PATCH] Remove prompt string from engine core data structures (#663) ### What this PR does / why we need it? vLLM Ascend side followup on: [Core] Remove prompt string from engine core data structures https://github.com/vllm-project/vllm/commit/df6f3ce883bbfb1884b040f178496a3da33209b9 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed Signed-off-by: Yikun Jiang --- vllm_ascend/worker/model_runner_v1.py | 42 ++++++++++++++++++--------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 5a9f78f..5e5c003 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -346,20 +346,34 @@ class NPUModelRunner: generator.manual_seed(sampling_params.seed) else: generator = None - - self.requests[req_id] = CachedRequestState( - req_id=req_id, - prompt_token_ids=new_req_data.prompt_token_ids, - prompt=new_req_data.prompt, - mm_inputs=new_req_data.mm_inputs, - mm_positions=new_req_data.mm_positions, - sampling_params=sampling_params, - generator=generator, - block_ids=new_req_data.block_ids, - num_computed_tokens=new_req_data.num_computed_tokens, - output_token_ids=[], - lora_request=new_req_data.lora_request, - ) + if vllm_version_is("0.8.4"): + self.requests[req_id] = CachedRequestState( + req_id=req_id, + prompt_token_ids=new_req_data.prompt_token_ids, + prompt=new_req_data.prompt, + mm_inputs=new_req_data.mm_inputs, + mm_positions=new_req_data.mm_positions, + sampling_params=sampling_params, + generator=generator, + block_ids=new_req_data.block_ids, + num_computed_tokens=new_req_data.num_computed_tokens, + output_token_ids=[], + lora_request=new_req_data.lora_request, + ) + else: + # the prompt removed by: https://github.com/vllm-project/vllm/pull/17214 + self.requests[req_id] = CachedRequestState( + req_id=req_id, + prompt_token_ids=new_req_data.prompt_token_ids, + mm_inputs=new_req_data.mm_inputs, + mm_positions=new_req_data.mm_positions, + sampling_params=sampling_params, + generator=generator, + block_ids=new_req_data.block_ids, + num_computed_tokens=new_req_data.num_computed_tokens, + output_token_ids=[], + lora_request=new_req_data.lora_request, + ) req_ids_to_add.append(req_id)