[Kernel] Add moe_gating_top_k operator support for Ascend NPU (#5579)
### What this PR does / why we need it?
1.replace moe_gating_top_k from torch_npu with custom op
2.enable the renorm function of moe_gating_top_k in softmax scenerio
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
No need test
- vLLM version: v0.13.0
- vLLM main:
7157596103
---------
Signed-off-by: ZCG12345 <2097562023@qq.com>
This commit is contained in:
89
csrc/moe_gating_top_k/op_kernel/common.h
Normal file
89
csrc/moe_gating_top_k/op_kernel/common.h
Normal file
@@ -0,0 +1,89 @@
|
||||
/**
|
||||
* This program is free software, you can redistribute it and/or modify.
|
||||
* Copyright (c) 2025 Huawei Technologies Co., Ltd.
|
||||
* This file is a part of the CANN Open Software.
|
||||
* Licensed under CANN Open Software License Agreement Version 2.0 (the "License").
|
||||
* Please refer to the License for details. You may not use this file except in compliance with the License.
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* See LICENSE in the root of the software repository for the full text of the License.
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \file common.h
|
||||
* \brief
|
||||
*/
|
||||
#ifndef MOE_GATING_TOP_K_COMMON_H
|
||||
#define MOE_GATING_TOP_K_COMMON_H
|
||||
|
||||
#include "kernel_operator.h"
|
||||
|
||||
namespace MoeGatingTopK {
|
||||
using namespace AscendC;
|
||||
const float MIN_FP32 = *(float *)(&F32_NEG_INF);
|
||||
constexpr int32_t FLOAT32_NEG_INF = 0xFF800000; // -inf -2139095040
|
||||
constexpr int64_t ONE_REPEAT_SORT_NUM = 32;
|
||||
constexpr int64_t BLOCK_BYTES = 32;
|
||||
constexpr int64_t REPEAT_BYTES = 256;
|
||||
constexpr int64_t REPEAT_BLOCKS = 8;
|
||||
|
||||
constexpr int32_t CONSTANT_TWO = 2;
|
||||
constexpr int32_t CONSTANT_THREE = 3;
|
||||
constexpr int32_t CONSTANT_FOUR = 4;
|
||||
constexpr int32_t CONSTANT_EIGHT = 8;
|
||||
|
||||
constexpr int64_t MERGE_LIST_TWO = 2;
|
||||
constexpr int64_t MERGE_LIST_THREE = 3;
|
||||
constexpr int64_t MERGE_LIST_FOUR = 4;
|
||||
|
||||
constexpr int64_t MERGE_LIST_IDX_TWO = 2;
|
||||
constexpr int64_t MERGE_LIST_IDX_THREE = 3;
|
||||
|
||||
constexpr int64_t NORM_TYPE_SOFTMAX = 0;
|
||||
constexpr int64_t NORM_TYPE_SIGMOID = 1;
|
||||
|
||||
__aicore__ inline int64_t Ceil(int64_t a, int64_t b)
|
||||
{
|
||||
if (b == 0) {
|
||||
return 0;
|
||||
}
|
||||
return (a + b - 1) / b;
|
||||
}
|
||||
|
||||
__aicore__ inline int64_t Align(int64_t elementNum, int64_t bytes)
|
||||
{
|
||||
if (bytes == 0) {
|
||||
return 0;
|
||||
}
|
||||
return (elementNum * bytes + BLOCK_BYTES - 1) / BLOCK_BYTES * BLOCK_BYTES / bytes;
|
||||
}
|
||||
|
||||
__aicore__ inline int64_t AlignBytes(int64_t elementNum, int64_t bytes)
|
||||
{
|
||||
return (elementNum * bytes + BLOCK_BYTES - 1) / BLOCK_BYTES * BLOCK_BYTES;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__aicore__ inline T Min(T a, T b)
|
||||
{
|
||||
return a > b ? b : a;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__aicore__ inline T Max(T a, T b)
|
||||
{
|
||||
return a < b ? b : a;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
__aicore__ inline T1 CeilDiv(T1 x, T2 y)
|
||||
{
|
||||
if (y != 0 && x != 0) {
|
||||
const T1 quotient = x / y;
|
||||
return (x % y != 0 && ((x ^ y) >= 0)) ? (quotient + 1) : quotient;
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
} // namespace MoeGatingTopK
|
||||
#endif // MOE_GATING_TOP_K_COMMON_H
|
||||
Reference in New Issue
Block a user