[v0.18.0][Bugfix] Fixed wrong class attribute assignment (#7586) (#7655)

### What this PR does / why we need it? Fixed incorrect class attribute assignment and corrected it to instance attribute assignment. Ensured reorder_batch_threshold only applies to the current instance to avoid global pollution and multi-instance conflicts. Backport of #7586 Signed-off-by: LookAround <lixushi@huawei.com> Signed-off-by: Mengqing Cao <cmq0113@163.com> Co-authored-by: LookAround0301 <lixushi@huawei.com>
2026-03-27 11:20:59 +08:00
parent 2c2d8bb015
commit 29308ac3a9
2 changed files with 2 additions and 10 deletions
--- a/vllm_ascend/attention/attention_v1.py
+++ b/vllm_ascend/attention/attention_v1.py
@@ -17,7 +17,6 @@

 from dataclasses import dataclass
 from enum import Enum
-from typing import ClassVar

 import torch
 import torch_npu
@@ -213,7 +212,7 @@ class AscendAttentionMetadataBuilder(AttentionMetadataBuilder[AscendMetadata]):
    # Does this backend/builder reorder the batch?
    # If not, set this to None. Otherwise set it to the query
    # length that will be pulled into the front of the batch.
-    reorder_batch_threshold: ClassVar[int] = 1
+    reorder_batch_threshold: int = 1

    def __init__(
        self,
@@ -242,7 +241,7 @@ class AscendAttentionMetadataBuilder(AttentionMetadataBuilder[AscendMetadata]):
                got {self.decode_threshold}"
            )

-        AscendAttentionMetadataBuilder.reorder_batch_threshold = self.decode_threshold
+        self.reorder_batch_threshold = self.decode_threshold

        scheduler_config = vllm_config.scheduler_config
        self.chunked_prefill_enabled = scheduler_config.enable_chunked_prefill