Revert "[Kernel] add custom moe ops for prefill" (#4806)
Reverts vllm-project/vllm-ascend#4194 as it broke CI in https://github.com/vllm-project/vllm-ascend/actions/runs/20030369087/job/57437687382?pr=4791 Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -45,19 +45,7 @@ elif [[ "$SOC_VERSION" =~ ^ascend910_93 ]]; then
|
||||
|
||||
sed -i 's/struct HcclOpResParam {/struct HcclOpResParamCustom {/g' "$TARGET_FILE"
|
||||
sed -i 's/struct HcclRankRelationResV2 {/struct HcclRankRelationResV2Custom {/g' "$TARGET_FILE"
|
||||
|
||||
CUSTOM_OPS_ARRAY=(
|
||||
"grouped_matmul_swiglu_quant_weight_nz_tensor_list"
|
||||
"lightning_indexer"
|
||||
"sparse_flash_attention"
|
||||
"dispatch_ffn_combine"
|
||||
"dispatch_gmm_combine_decode"
|
||||
"moe_combine_normal"
|
||||
"moe_dispatch_normal"
|
||||
"dispatch_layout"
|
||||
"notify_dispatch"
|
||||
)
|
||||
CUSTOM_OPS=$(IFS=';'; echo "${CUSTOM_OPS_ARRAY[*]}")
|
||||
CUSTOM_OPS="grouped_matmul_swiglu_quant_weight_nz_tensor_list;lightning_indexer;sparse_flash_attention;dispatch_ffn_combine;dispatch_gmm_combine_decode;"
|
||||
SOC_ARG="ascend910_93"
|
||||
else
|
||||
# others
|
||||
@@ -70,7 +58,7 @@ fi
|
||||
cd csrc
|
||||
rm -rf build output
|
||||
echo "building custom ops $CUSTOM_OPS for $SOC_VERSION"
|
||||
bash build.sh -n "$CUSTOM_OPS" -c "$SOC_ARG"
|
||||
bash build.sh -n $CUSTOM_OPS -c $SOC_ARG
|
||||
|
||||
# install custom ops to vllm_ascend/_cann_ops_custom
|
||||
./output/CANN-custom_ops*.run --install-path=$ROOT_DIR/vllm_ascend/_cann_ops_custom
|
||||
|
||||
Reference in New Issue
Block a user