### What this PR does / why we need it? Fixed incorrect class attribute assignment and corrected it to instance attribute assignment. Ensured reorder_batch_threshold only applies to the current instance to avoid global pollution and multi-instance conflicts. Backport of #7586 Signed-off-by: LookAround <lixushi@huawei.com> Signed-off-by: Mengqing Cao <cmq0113@163.com> Co-authored-by: LookAround0301 <lixushi@huawei.com>
This commit is contained in:
@@ -17,7 +17,6 @@
|
|||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import ClassVar
|
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch_npu
|
import torch_npu
|
||||||
@@ -213,7 +212,7 @@ class AscendAttentionMetadataBuilder(AttentionMetadataBuilder[AscendMetadata]):
|
|||||||
# Does this backend/builder reorder the batch?
|
# Does this backend/builder reorder the batch?
|
||||||
# If not, set this to None. Otherwise set it to the query
|
# If not, set this to None. Otherwise set it to the query
|
||||||
# length that will be pulled into the front of the batch.
|
# length that will be pulled into the front of the batch.
|
||||||
reorder_batch_threshold: ClassVar[int] = 1
|
reorder_batch_threshold: int = 1
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@@ -242,7 +241,7 @@ class AscendAttentionMetadataBuilder(AttentionMetadataBuilder[AscendMetadata]):
|
|||||||
got {self.decode_threshold}"
|
got {self.decode_threshold}"
|
||||||
)
|
)
|
||||||
|
|
||||||
AscendAttentionMetadataBuilder.reorder_batch_threshold = self.decode_threshold
|
self.reorder_batch_threshold = self.decode_threshold
|
||||||
|
|
||||||
scheduler_config = vllm_config.scheduler_config
|
scheduler_config = vllm_config.scheduler_config
|
||||||
self.chunked_prefill_enabled = scheduler_config.enable_chunked_prefill
|
self.chunked_prefill_enabled = scheduler_config.enable_chunked_prefill
|
||||||
|
|||||||
@@ -15,8 +15,6 @@
|
|||||||
# This file is a part of the vllm-ascend project.
|
# This file is a part of the vllm-ascend project.
|
||||||
#
|
#
|
||||||
|
|
||||||
from typing import ClassVar
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.distributed as dist
|
import torch.distributed as dist
|
||||||
@@ -61,11 +59,6 @@ class AscendAttentionCPMetadataBuilder(AscendAttentionMetadataBuilder):
|
|||||||
Extends AscendAttentionMetadataBuilder with PCP/DCP metadata handling.
|
Extends AscendAttentionMetadataBuilder with PCP/DCP metadata handling.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Does this backend/builder reorder the batch?
|
|
||||||
# If not, set this to None. Otherwise set it to the query
|
|
||||||
# length that will be pulled into the front of the batch.
|
|
||||||
reorder_batch_threshold: ClassVar[int] = 1
|
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
kv_cache_spec: AttentionSpec,
|
kv_cache_spec: AttentionSpec,
|
||||||
|
|||||||
Reference in New Issue
Block a user