[Kernel] Add moe normal ops (#4810)
### What this PR does / why we need it?
1.Add the implementation of normal Aclnn operators: MoeCombineNormal,
MoeDispatchNormal, NotifyDispatch,and DispatchLayout.
- MoeCombineNormal: Implements the combine logic within MoE operations.
- MoeDispatchNormal: Implements the dispatch logic within MoE
operations.
- NotifyDispatch: Exchanges topk_idx information among different ranks
to calculate the device memory required for the dispatch stage.
- DispatchLayout: Used to calculate information related to the device
memory layout for the dispatch stage.
2.Provide PyTorch interfaces for normal operators—get_dispatch_layout,
dispatch_prefill, and combine_prefill—to be used for MoE communication
during the prefill stage in vLLM.
- get_dispatch_layout: Calculates information related to the device
memory layout for the dispatch operator, and is called before
dispatch_prefill.
- dispatch_prefill: Initiates the dispatch operation.
- combine_prefill: Initiates the combine operation.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
The functionality has already been validated using the local Qwen model.
Test cases will be added after support for multi-NPU use cases in the CI
pipeline is finalized.
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
Signed-off-by: shiro-zzzz <zhangdianhao@huawei.com>
This commit is contained in:
@@ -45,7 +45,7 @@ elif [[ "$SOC_VERSION" =~ ^ascend910_93 ]]; then
|
||||
exit 1
|
||||
fi
|
||||
# for dispatch_gmm_combine_decode
|
||||
yes | cp "${HCCL_STRUCT_FILE_PATH}" "${ROOT_DIR}/csrc/dispatch_gmm_combine_decode/op_kernel"
|
||||
yes | cp "${HCCL_STRUCT_FILE_PATH}" "${ROOT_DIR}/csrc/utils/inc/kernel"
|
||||
# for dispatch_ffn_combine
|
||||
SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
|
||||
TARGET_DIR="$SCRIPT_DIR/dispatch_ffn_combine/op_kernel/utils/"
|
||||
@@ -58,7 +58,19 @@ elif [[ "$SOC_VERSION" =~ ^ascend910_93 ]]; then
|
||||
|
||||
sed -i 's/struct HcclOpResParam {/struct HcclOpResParamCustom {/g' "$TARGET_FILE"
|
||||
sed -i 's/struct HcclRankRelationResV2 {/struct HcclRankRelationResV2Custom {/g' "$TARGET_FILE"
|
||||
CUSTOM_OPS="grouped_matmul_swiglu_quant_weight_nz_tensor_list;lightning_indexer;sparse_flash_attention;dispatch_ffn_combine;dispatch_gmm_combine_decode;"
|
||||
|
||||
CUSTOM_OPS_ARRAY=(
|
||||
"grouped_matmul_swiglu_quant_weight_nz_tensor_list"
|
||||
"lightning_indexer"
|
||||
"sparse_flash_attention"
|
||||
"dispatch_ffn_combine"
|
||||
"dispatch_gmm_combine_decode"
|
||||
"moe_combine_normal"
|
||||
"moe_dispatch_normal"
|
||||
"dispatch_layout"
|
||||
"notify_dispatch"
|
||||
)
|
||||
CUSTOM_OPS=$(IFS=';'; echo "${CUSTOM_OPS_ARRAY[*]}")
|
||||
SOC_ARG="ascend910_93"
|
||||
else
|
||||
# others
|
||||
@@ -71,7 +83,7 @@ fi
|
||||
cd csrc
|
||||
rm -rf build output
|
||||
echo "building custom ops $CUSTOM_OPS for $SOC_VERSION"
|
||||
bash build.sh -n $CUSTOM_OPS -c $SOC_ARG
|
||||
bash build.sh -n "$CUSTOM_OPS" -c "$SOC_ARG"
|
||||
|
||||
# install custom ops to vllm_ascend/_cann_ops_custom
|
||||
./output/CANN-custom_ops*.run --install-path=$ROOT_DIR/vllm_ascend/_cann_ops_custom
|
||||
|
||||
Reference in New Issue
Block a user