### What this PR does / why we need it?
1.replace moe_gating_top_k from torch_npu with custom op
2.enable the renorm function of moe_gating_top_k in softmax scenerio
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
No need test
- vLLM version: v0.13.0
- vLLM main:
7157596103
---------
Signed-off-by: ZCG12345 <2097562023@qq.com>
89 lines
2.5 KiB
C++
89 lines
2.5 KiB
C++
/**
|
||
* This program is free software, you can redistribute it and/or modify.
|
||
* Copyright (c) 2025 Huawei Technologies Co., Ltd.
|
||
* This file is a part of the CANN Open Software.
|
||
* Licensed under CANN Open Software License Agreement Version 2.0 (the "License").
|
||
* Please refer to the License for details. You may not use this file except in compliance with the License.
|
||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
|
||
* See LICENSE in the root of the software repository for the full text of the License.
|
||
*/
|
||
|
||
/*!
|
||
* \file common.h
|
||
* \brief
|
||
*/
|
||
#ifndef MOE_GATING_TOP_K_COMMON_H
|
||
#define MOE_GATING_TOP_K_COMMON_H
|
||
|
||
#include "kernel_operator.h"
|
||
|
||
namespace MoeGatingTopK {
|
||
using namespace AscendC;
|
||
const float MIN_FP32 = *(float *)(&F32_NEG_INF);
|
||
constexpr int32_t FLOAT32_NEG_INF = 0xFF800000; // -inf -2139095040
|
||
constexpr int64_t ONE_REPEAT_SORT_NUM = 32;
|
||
constexpr int64_t BLOCK_BYTES = 32;
|
||
constexpr int64_t REPEAT_BYTES = 256;
|
||
constexpr int64_t REPEAT_BLOCKS = 8;
|
||
|
||
constexpr int32_t CONSTANT_TWO = 2;
|
||
constexpr int32_t CONSTANT_THREE = 3;
|
||
constexpr int32_t CONSTANT_FOUR = 4;
|
||
constexpr int32_t CONSTANT_EIGHT = 8;
|
||
|
||
constexpr int64_t MERGE_LIST_TWO = 2;
|
||
constexpr int64_t MERGE_LIST_THREE = 3;
|
||
constexpr int64_t MERGE_LIST_FOUR = 4;
|
||
|
||
constexpr int64_t MERGE_LIST_IDX_TWO = 2;
|
||
constexpr int64_t MERGE_LIST_IDX_THREE = 3;
|
||
|
||
constexpr int64_t NORM_TYPE_SOFTMAX = 0;
|
||
constexpr int64_t NORM_TYPE_SIGMOID = 1;
|
||
|
||
__aicore__ inline int64_t Ceil(int64_t a, int64_t b)
|
||
{
|
||
if (b == 0) {
|
||
return 0;
|
||
}
|
||
return (a + b - 1) / b;
|
||
}
|
||
|
||
__aicore__ inline int64_t Align(int64_t elementNum, int64_t bytes)
|
||
{
|
||
if (bytes == 0) {
|
||
return 0;
|
||
}
|
||
return (elementNum * bytes + BLOCK_BYTES - 1) / BLOCK_BYTES * BLOCK_BYTES / bytes;
|
||
}
|
||
|
||
__aicore__ inline int64_t AlignBytes(int64_t elementNum, int64_t bytes)
|
||
{
|
||
return (elementNum * bytes + BLOCK_BYTES - 1) / BLOCK_BYTES * BLOCK_BYTES;
|
||
}
|
||
|
||
template <typename T>
|
||
__aicore__ inline T Min(T a, T b)
|
||
{
|
||
return a > b ? b : a;
|
||
}
|
||
|
||
template <typename T>
|
||
__aicore__ inline T Max(T a, T b)
|
||
{
|
||
return a < b ? b : a;
|
||
}
|
||
|
||
template <typename T1, typename T2>
|
||
__aicore__ inline T1 CeilDiv(T1 x, T2 y)
|
||
{
|
||
if (y != 0 && x != 0) {
|
||
const T1 quotient = x / y;
|
||
return (x % y != 0 && ((x ^ y) >= 0)) ? (quotient + 1) : quotient;
|
||
}
|
||
|
||
return x;
|
||
}
|
||
|
||
} // namespace MoeGatingTopK
|
||
#endif // MOE_GATING_TOP_K_COMMON_H
|