Files
xc-llm-ascend/csrc/moe_gating_top_k/op_kernel/common.h
ZCG12345 3be8e33fe9 [Kernel] Add moe_gating_top_k operator support for Ascend NPU (#5579)
### What this PR does / why we need it?

1.replace moe_gating_top_k from torch_npu with custom op
2.enable the  renorm function of moe_gating_top_k in softmax scenerio

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
No need test

- vLLM version: v0.13.0
- vLLM main:
7157596103

---------

Signed-off-by: ZCG12345 <2097562023@qq.com>
2026-01-07 21:42:31 +08:00

89 lines
2.5 KiB
C++
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
 * This program is free software, you can redistribute it and/or modify.
 * Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This file is a part of the CANN Open Software.
* Licensed under CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
/*!
* \file common.h
* \brief
*/
#ifndef MOE_GATING_TOP_K_COMMON_H
#define MOE_GATING_TOP_K_COMMON_H
#include "kernel_operator.h"
namespace MoeGatingTopK {
using namespace AscendC;
const float MIN_FP32 = *(float *)(&F32_NEG_INF);
constexpr int32_t FLOAT32_NEG_INF = 0xFF800000; // -inf -2139095040
constexpr int64_t ONE_REPEAT_SORT_NUM = 32;
constexpr int64_t BLOCK_BYTES = 32;
constexpr int64_t REPEAT_BYTES = 256;
constexpr int64_t REPEAT_BLOCKS = 8;
constexpr int32_t CONSTANT_TWO = 2;
constexpr int32_t CONSTANT_THREE = 3;
constexpr int32_t CONSTANT_FOUR = 4;
constexpr int32_t CONSTANT_EIGHT = 8;
constexpr int64_t MERGE_LIST_TWO = 2;
constexpr int64_t MERGE_LIST_THREE = 3;
constexpr int64_t MERGE_LIST_FOUR = 4;
constexpr int64_t MERGE_LIST_IDX_TWO = 2;
constexpr int64_t MERGE_LIST_IDX_THREE = 3;
constexpr int64_t NORM_TYPE_SOFTMAX = 0;
constexpr int64_t NORM_TYPE_SIGMOID = 1;
__aicore__ inline int64_t Ceil(int64_t a, int64_t b)
{
if (b == 0) {
return 0;
}
return (a + b - 1) / b;
}
__aicore__ inline int64_t Align(int64_t elementNum, int64_t bytes)
{
if (bytes == 0) {
return 0;
}
return (elementNum * bytes + BLOCK_BYTES - 1) / BLOCK_BYTES * BLOCK_BYTES / bytes;
}
__aicore__ inline int64_t AlignBytes(int64_t elementNum, int64_t bytes)
{
return (elementNum * bytes + BLOCK_BYTES - 1) / BLOCK_BYTES * BLOCK_BYTES;
}
template <typename T>
__aicore__ inline T Min(T a, T b)
{
return a > b ? b : a;
}
template <typename T>
__aicore__ inline T Max(T a, T b)
{
return a < b ? b : a;
}
template <typename T1, typename T2>
__aicore__ inline T1 CeilDiv(T1 x, T2 y)
{
if (y != 0 && x != 0) {
const T1 quotient = x / y;
return (x % y != 0 && ((x ^ y) >= 0)) ? (quotient + 1) : quotient;
}
return x;
}
} // namespace MoeGatingTopK
#endif // MOE_GATING_TOP_K_COMMON_H