[MISC] fix format check error (#654)

This pr makes format.sh works as expect.

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-04-29 11:14:19 +08:00
committed by GitHub
parent 1fce70a2fb
commit 0dae55a9a3
17 changed files with 45 additions and 44 deletions

View File

@@ -1080,7 +1080,7 @@ class AscendMLAAttentionBackendImpl(MLAAttentionImpl):
if len(kv_cache) > 0 and kv_cache[0].numel(
) > 0 and attn_metadata.num_prefills > 0:
slots = attn_metadata.slot_mapping
# NOTE: Seperate the kv cache in advance to avoid OOM or other issues
# NOTE: Separate the kv cache in advance to avoid OOM or other issues
torch_npu._npu_reshape_and_cache(key=kv_c_normed.view(
num_tokens, self.num_kv_heads, -1),
value=k_pe,

View File

@@ -60,7 +60,7 @@ class AscendSchedulerConfig(SchedulerConfig):
)
if self.is_multimodal_model:
raise NotImplementedError(
"currently AscendScheduler only supports LLM modles.")
"currently AscendScheduler only supports LLM models.")
if self.num_scheduler_steps > 1:
raise NotImplementedError(
"currently AscendScheduler doesn't support multi-step.")

View File

@@ -57,8 +57,10 @@ def get_device_ips():
universal_newlines=True)
if npu_info.returncode != 0 or not os.path.exists(HCCN_TOOL_PATH):
raise RuntimeError("No npu-smi/hccn_tool tools provided for NPU.")
npu_start_idx = int(
re.match(r'.*\n\t([0-9]+).*', npu_info.stdout).group(1))
re_result = re.match(r'.*\n\t([0-9]+).*', npu_info.stdout)
if re_result is None:
raise RuntimeError("Can't find npu start index")
npu_start_idx = int(re_result.group(1))
device_ip_list = []
for ip_offset in range(world_size):
cmd = [
@@ -68,7 +70,10 @@ def get_device_ips():
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True)
device_ip = re.match(r'ipaddr:(.*)\n', device_ip_info.stdout).group(1)
re_result = re.match(r'ipaddr:(.*)\n', device_ip_info.stdout)
if re_result is None:
raise RuntimeError("Can't find npu ip")
device_ip = re_result.group(1)
device_ip_list.append(device_ip)
return device_ip_list

View File

@@ -7,7 +7,7 @@ from vllm.distributed.parallel_state import (GroupCoordinator, get_world_group,
# vllm-ascend will maintain its own EP GroupCoordinator and ETP GroupCoordinator for
# customize parallel solution
_EP: Optional[GroupCoordinator] = None
_ETP: Optional[list[GroupCoordinator]] = None
_ETP: Optional[GroupCoordinator] = None
def get_ep_group() -> GroupCoordinator:
@@ -69,4 +69,4 @@ def destory_ascend_model_parallel():
global _ETP
if _ETP:
_ETP.destroy()
_ETP = None
_ETP = None

View File

@@ -278,7 +278,7 @@ def fused_experts(
dtype=dtype)
# TODO: npu_grouped_matmul output random values at [num_valid_tokens:, ...]
# This created multiple NaN and index_add_ will mix them up which harms accracy
# This created multiple NaN and index_add_ will mix them up which harms accuracy
# remove this mask and filter after it being fixed
num_valid_tokens = mask.sum()
valid_token_mask = torch.arange(

View File

@@ -227,7 +227,7 @@ def _set_cos_sin_cache(self, seq_len, device, dtype):
persistent=False)
# TODO: Patch when aclnn ops avaiable
# TODO: Patch when aclnn ops available
RotaryEmbedding.forward_oot = rope_forward_oot
DeepseekScalingRotaryEmbedding.forward = native_rope_deepseek_forward
DeepseekScalingRotaryEmbedding._set_cos_sin_cache = _set_cos_sin_cache