[Bugfix][DispatchFFNCombine] resolve vec error caused by unaligned UB access (#6707)
### What this PR does / why we need it?
1. Fix a vec error caused by unaligned UB accesss in the
DispatchFFNCombine;
2. Fix expert_token_nums tensor defined on host instead of NPU in
moe_comm_method.py
3. Fix multi-core copy issue of expert_token_nums in dispatchffnCombine
op (single aiv copy is sufficient)
### Does this PR introduce _any_ user-facing change?
No, this PR does not introduce any user-facing changes. The fix only
addresses internal memory access logic and does not modify any public
APIs, interfaces, or user-visible behaviors.
### How was this patch tested?
`export VLLM_ASCEND_ENABLE_FUSED_MC2=1`
vLLM version: v0.15.0
- vLLM version: v0.15.0
- vLLM main:
9562912cea
Signed-off-by: xulei_ict <xulei292@huawei.com>
Co-authored-by: xulei_ict <xulei292@huawei.com>
This commit is contained in:
@@ -277,6 +277,13 @@ class FusedMC2CommImpl(MoECommMethod):
|
||||
Communication and Computation parallelism on Ascend devices.
|
||||
"""
|
||||
|
||||
def __init__(self, moe_config):
|
||||
super().__init__(moe_config)
|
||||
if envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2 == 1:
|
||||
self.expert_token_nums = torch.zeros([self.moe_config.num_local_experts], dtype=torch.int32, device="npu")
|
||||
else:
|
||||
self.expert_token_nums = None
|
||||
|
||||
def _get_token_dispatcher(self):
|
||||
return TokenDispatcherWithMC2()
|
||||
|
||||
@@ -325,7 +332,6 @@ class FusedMC2CommImpl(MoECommMethod):
|
||||
expert_tokens = None
|
||||
if envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2 == 1:
|
||||
out = torch.empty_like(hidden_states)
|
||||
expert_token_nums = torch.zeros([self.moe_config.num_local_experts], dtype=torch.int32)
|
||||
torch.ops._C_ascend.dispatch_ffn_combine( # type: ignore
|
||||
x=hidden_states,
|
||||
weight1=w1,
|
||||
@@ -337,9 +343,9 @@ class FusedMC2CommImpl(MoECommMethod):
|
||||
group=self.token_dispatcher.moe_all_to_all_group_name,
|
||||
max_output_size=65536,
|
||||
out=out,
|
||||
expert_token_nums=expert_token_nums,
|
||||
expert_token_nums=self.expert_token_nums,
|
||||
)
|
||||
expert_tokens = expert_token_nums
|
||||
expert_tokens = self.expert_token_nums
|
||||
elif envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2 == 2:
|
||||
assert expert_map is not None, "expert_map cannot be None."
|
||||
group_list_type = 1
|
||||
|
||||
Reference in New Issue
Block a user