From 9fd01a52c09fe653faa9498afe920c81775a0f9e Mon Sep 17 00:00:00 2001 From: wangbj127 <256472688+wangbj127@users.noreply.github.com> Date: Mon, 27 Apr 2026 23:27:34 +0800 Subject: [PATCH] [v0.18.0][BugFix] Fix DSV3.1 W4A8 TTFT degradation (#8674) ### What this PR does / why we need it? Fix TTFT degradation on Deepseek-V3.1-W4A8. Revert change of `balance_flag` in https://github.com/vllm-project/vllm-ascend/pull/7611. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? - vLLM version: v0.18.0 Signed-off-by: Wangbingjie --- vllm_ascend/patch/platform/patch_balance_schedule.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_ascend/patch/platform/patch_balance_schedule.py b/vllm_ascend/patch/platform/patch_balance_schedule.py index 9a2cc722..abcc8d9f 100644 --- a/vllm_ascend/patch/platform/patch_balance_schedule.py +++ b/vllm_ascend/patch/platform/patch_balance_schedule.py @@ -266,7 +266,7 @@ class BalanceScheduler(Scheduler): if len(self.running) == self.max_num_running_reqs: break - balance_flag = max(t.item() for t in self.balance_queue) >= self.max_num_running_reqs - 1 + balance_flag = max(t.item() for t in self.balance_queue) == self.max_num_running_reqs if balance_flag: break