[v0.18.0][Bugfix] Fixed wrong class attribute assignment (#7586) (#7655)

### What this PR does / why we need it?
Fixed incorrect class attribute assignment and corrected it to instance
attribute assignment. Ensured reorder_batch_threshold only applies to
the current instance to avoid global pollution and multi-instance
conflicts.

Backport of #7586

Signed-off-by: LookAround <lixushi@huawei.com>
Signed-off-by: Mengqing Cao <cmq0113@163.com>
Co-authored-by: LookAround0301 <lixushi@huawei.com>
This commit is contained in:
Mengqing Cao
2026-03-27 11:20:59 +08:00
committed by GitHub
parent 2c2d8bb015
commit 29308ac3a9
2 changed files with 2 additions and 10 deletions

View File

@@ -17,7 +17,6 @@
from dataclasses import dataclass
from enum import Enum
from typing import ClassVar
import torch
import torch_npu
@@ -213,7 +212,7 @@ class AscendAttentionMetadataBuilder(AttentionMetadataBuilder[AscendMetadata]):
# Does this backend/builder reorder the batch?
# If not, set this to None. Otherwise set it to the query
# length that will be pulled into the front of the batch.
reorder_batch_threshold: ClassVar[int] = 1
reorder_batch_threshold: int = 1
def __init__(
self,
@@ -242,7 +241,7 @@ class AscendAttentionMetadataBuilder(AttentionMetadataBuilder[AscendMetadata]):
got {self.decode_threshold}"
)
AscendAttentionMetadataBuilder.reorder_batch_threshold = self.decode_threshold
self.reorder_batch_threshold = self.decode_threshold
scheduler_config = vllm_config.scheduler_config
self.chunked_prefill_enabled = scheduler_config.enable_chunked_prefill

View File

@@ -15,8 +15,6 @@
# This file is a part of the vllm-ascend project.
#
from typing import ClassVar
import numpy as np
import torch
import torch.distributed as dist
@@ -61,11 +59,6 @@ class AscendAttentionCPMetadataBuilder(AscendAttentionMetadataBuilder):
Extends AscendAttentionMetadataBuilder with PCP/DCP metadata handling.
"""
# Does this backend/builder reorder the batch?
# If not, set this to None. Otherwise set it to the query
# length that will be pulled into the front of the batch.
reorder_batch_threshold: ClassVar[int] = 1
def __init__(
self,
kv_cache_spec: AttentionSpec,