[MISC] fix format check error (#654)
This pr makes format.sh works as expect. Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -1080,7 +1080,7 @@ class AscendMLAAttentionBackendImpl(MLAAttentionImpl):
|
||||
if len(kv_cache) > 0 and kv_cache[0].numel(
|
||||
) > 0 and attn_metadata.num_prefills > 0:
|
||||
slots = attn_metadata.slot_mapping
|
||||
# NOTE: Seperate the kv cache in advance to avoid OOM or other issues
|
||||
# NOTE: Separate the kv cache in advance to avoid OOM or other issues
|
||||
torch_npu._npu_reshape_and_cache(key=kv_c_normed.view(
|
||||
num_tokens, self.num_kv_heads, -1),
|
||||
value=k_pe,
|
||||
|
||||
@@ -60,7 +60,7 @@ class AscendSchedulerConfig(SchedulerConfig):
|
||||
)
|
||||
if self.is_multimodal_model:
|
||||
raise NotImplementedError(
|
||||
"currently AscendScheduler only supports LLM modles.")
|
||||
"currently AscendScheduler only supports LLM models.")
|
||||
if self.num_scheduler_steps > 1:
|
||||
raise NotImplementedError(
|
||||
"currently AscendScheduler doesn't support multi-step.")
|
||||
|
||||
@@ -57,8 +57,10 @@ def get_device_ips():
|
||||
universal_newlines=True)
|
||||
if npu_info.returncode != 0 or not os.path.exists(HCCN_TOOL_PATH):
|
||||
raise RuntimeError("No npu-smi/hccn_tool tools provided for NPU.")
|
||||
npu_start_idx = int(
|
||||
re.match(r'.*\n\t([0-9]+).*', npu_info.stdout).group(1))
|
||||
re_result = re.match(r'.*\n\t([0-9]+).*', npu_info.stdout)
|
||||
if re_result is None:
|
||||
raise RuntimeError("Can't find npu start index")
|
||||
npu_start_idx = int(re_result.group(1))
|
||||
device_ip_list = []
|
||||
for ip_offset in range(world_size):
|
||||
cmd = [
|
||||
@@ -68,7 +70,10 @@ def get_device_ips():
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
universal_newlines=True)
|
||||
device_ip = re.match(r'ipaddr:(.*)\n', device_ip_info.stdout).group(1)
|
||||
re_result = re.match(r'ipaddr:(.*)\n', device_ip_info.stdout)
|
||||
if re_result is None:
|
||||
raise RuntimeError("Can't find npu ip")
|
||||
device_ip = re_result.group(1)
|
||||
device_ip_list.append(device_ip)
|
||||
return device_ip_list
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ from vllm.distributed.parallel_state import (GroupCoordinator, get_world_group,
|
||||
# vllm-ascend will maintain its own EP GroupCoordinator and ETP GroupCoordinator for
|
||||
# customize parallel solution
|
||||
_EP: Optional[GroupCoordinator] = None
|
||||
_ETP: Optional[list[GroupCoordinator]] = None
|
||||
_ETP: Optional[GroupCoordinator] = None
|
||||
|
||||
|
||||
def get_ep_group() -> GroupCoordinator:
|
||||
@@ -69,4 +69,4 @@ def destory_ascend_model_parallel():
|
||||
global _ETP
|
||||
if _ETP:
|
||||
_ETP.destroy()
|
||||
_ETP = None
|
||||
_ETP = None
|
||||
|
||||
@@ -278,7 +278,7 @@ def fused_experts(
|
||||
dtype=dtype)
|
||||
|
||||
# TODO: npu_grouped_matmul output random values at [num_valid_tokens:, ...]
|
||||
# This created multiple NaN and index_add_ will mix them up which harms accracy
|
||||
# This created multiple NaN and index_add_ will mix them up which harms accuracy
|
||||
# remove this mask and filter after it being fixed
|
||||
num_valid_tokens = mask.sum()
|
||||
valid_token_mask = torch.arange(
|
||||
|
||||
@@ -227,7 +227,7 @@ def _set_cos_sin_cache(self, seq_len, device, dtype):
|
||||
persistent=False)
|
||||
|
||||
|
||||
# TODO: Patch when aclnn ops avaiable
|
||||
# TODO: Patch when aclnn ops available
|
||||
RotaryEmbedding.forward_oot = rope_forward_oot
|
||||
DeepseekScalingRotaryEmbedding.forward = native_rope_deepseek_forward
|
||||
DeepseekScalingRotaryEmbedding._set_cos_sin_cache = _set_cos_sin_cache
|
||||
|
||||
Reference in New Issue
Block a user