[CI][lint] Add rule codespell back (#6236)
### What this PR does / why we need it?
After removing codepsell a while, we discovered that typo had a problem
correctly recognizing certain misspelled words, so I suggested adding it
back.
- vLLM version: v0.14.1
- vLLM main:
d68209402d
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
@@ -29,7 +29,7 @@
|
|||||||
/.readthedocs.yaml @wangxiyuan @Yikun
|
/.readthedocs.yaml @wangxiyuan @Yikun
|
||||||
/README* @wangxiyuan @Yikun
|
/README* @wangxiyuan @Yikun
|
||||||
|
|
||||||
# exmaple
|
# example
|
||||||
/examples @wangxiyuan
|
/examples @wangxiyuan
|
||||||
|
|
||||||
# tests
|
# tests
|
||||||
|
|||||||
4
.github/ISSUE_TEMPLATE/110-user-story.yml
vendored
4
.github/ISSUE_TEMPLATE/110-user-story.yml
vendored
@@ -18,7 +18,7 @@ body:
|
|||||||
A brief introduction about the background of your use case, like your scenario, hardware size etc.
|
A brief introduction about the background of your use case, like your scenario, hardware size etc.
|
||||||
- type: textarea
|
- type: textarea
|
||||||
attributes:
|
attributes:
|
||||||
label: Bussiness Challenges
|
label: Business Challenges
|
||||||
description: >
|
description: >
|
||||||
Tell us how what kind of challenge you faced in this user story.
|
Tell us how what kind of challenge you faced in this user story.
|
||||||
- type: textarea
|
- type: textarea
|
||||||
@@ -30,7 +30,7 @@ body:
|
|||||||
attributes:
|
attributes:
|
||||||
label: Extra Info
|
label: Extra Info
|
||||||
description: >
|
description: >
|
||||||
Any extra infomation you want to include in this story
|
Any extra information you want to include in this story
|
||||||
- type: markdown
|
- type: markdown
|
||||||
attributes:
|
attributes:
|
||||||
value: >
|
value: >
|
||||||
|
|||||||
2
.github/workflows/_schedule_image_build.yaml
vendored
2
.github/workflows/_schedule_image_build.yaml
vendored
@@ -139,7 +139,7 @@ jobs:
|
|||||||
quay.io/ascend/vllm-ascend
|
quay.io/ascend/vllm-ascend
|
||||||
# Note for test case
|
# Note for test case
|
||||||
# https://github.com/marketplace/actions/docker-metadata-action#typeref
|
# https://github.com/marketplace/actions/docker-metadata-action#typeref
|
||||||
# 1. branch job pulish per main/*-dev branch commits
|
# 1. branch job publish per main/*-dev branch commits
|
||||||
# 2. main and dev pull_request is build only, so the tag pr-N-openeuler is fine
|
# 2. main and dev pull_request is build only, so the tag pr-N-openeuler is fine
|
||||||
# 3. only pep440 matched tag will be published:
|
# 3. only pep440 matched tag will be published:
|
||||||
# - v0.7.1 --> v0.7.1-openeuler
|
# - v0.7.1 --> v0.7.1-openeuler
|
||||||
|
|||||||
@@ -11,6 +11,17 @@ repos:
|
|||||||
- id: ruff-check
|
- id: ruff-check
|
||||||
args: [--output-format, github, --fix]
|
args: [--output-format, github, --fix]
|
||||||
- id: ruff-format
|
- id: ruff-format
|
||||||
|
- repo: https://github.com/codespell-project/codespell
|
||||||
|
rev: v2.4.1
|
||||||
|
hooks:
|
||||||
|
- id: codespell
|
||||||
|
args: [
|
||||||
|
--toml, pyproject.toml,
|
||||||
|
'--skip', 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**,typos.toml',
|
||||||
|
'-L', 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,ArchType,AND,ND,tbe,copyin,alog'
|
||||||
|
]
|
||||||
|
additional_dependencies:
|
||||||
|
- tomli
|
||||||
- repo: https://github.com/crate-ci/typos
|
- repo: https://github.com/crate-ci/typos
|
||||||
rev: v1.32.0
|
rev: v1.32.0
|
||||||
hooks:
|
hooks:
|
||||||
|
|||||||
@@ -144,10 +144,10 @@ static bool CheckInputOutputDim(const gert::TilingContext* context)
|
|||||||
|
|
||||||
OP_CHECK_IF(
|
OP_CHECK_IF(
|
||||||
x1DimNum != x2DimNum,
|
x1DimNum != x2DimNum,
|
||||||
OP_LOGE(context, "Input x2/x1 shape invaild, dim num is not equal x1 dim."), return false);
|
OP_LOGE(context, "Input x2/x1 shape invalid, dim num is not equal x1 dim."), return false);
|
||||||
OP_CHECK_IF(
|
OP_CHECK_IF(
|
||||||
(yDimNum != xDimNum) || (xDimNum != x1DimNum) || (rstdDimNum != x1DimNum),
|
(yDimNum != xDimNum) || (xDimNum != x1DimNum) || (rstdDimNum != x1DimNum),
|
||||||
OP_LOGE(context, "Output y/x/rstd shape invaild, dim num is not equal x1 dim."), return false);
|
OP_LOGE(context, "Output y/x/rstd shape invalid, dim num is not equal x1 dim."), return false);
|
||||||
OP_CHECK_IF(
|
OP_CHECK_IF(
|
||||||
x1DimNum < gammaDimNum, OP_LOGE(context, "X1 dim num should not be smaller than gamma dim num."),
|
x1DimNum < gammaDimNum, OP_LOGE(context, "X1 dim num should not be smaller than gamma dim num."),
|
||||||
return false);
|
return false);
|
||||||
@@ -180,26 +180,26 @@ static bool CheckInputOutputShape(const gert::TilingContext* context)
|
|||||||
return false);
|
return false);
|
||||||
OP_CHECK_IF(
|
OP_CHECK_IF(
|
||||||
x2_shape->GetStorageShape().GetDim(i) != x1_shape->GetStorageShape().GetDim(i),
|
x2_shape->GetStorageShape().GetDim(i) != x1_shape->GetStorageShape().GetDim(i),
|
||||||
OP_LOGE(context, "Input x2/x1 shape invaild, shape is not equal x1 shape."), return false);
|
OP_LOGE(context, "Input x2/x1 shape invalid, shape is not equal x1 shape."), return false);
|
||||||
OP_CHECK_IF(
|
OP_CHECK_IF(
|
||||||
(y_shape->GetStorageShape().GetDim(i) != x1_shape->GetStorageShape().GetDim(i)) ||
|
(y_shape->GetStorageShape().GetDim(i) != x1_shape->GetStorageShape().GetDim(i)) ||
|
||||||
(x_shape->GetStorageShape().GetDim(i) != x1_shape->GetStorageShape().GetDim(i)),
|
(x_shape->GetStorageShape().GetDim(i) != x1_shape->GetStorageShape().GetDim(i)),
|
||||||
OP_LOGE(context, "Input y/x shape invaild, shape is not equal x1 shape."), return false);
|
OP_LOGE(context, "Input y/x shape invalid, shape is not equal x1 shape."), return false);
|
||||||
}
|
}
|
||||||
for (uint32_t i = 0; i < x1DimNum - gammaDimNum; i++) {
|
for (uint32_t i = 0; i < x1DimNum - gammaDimNum; i++) {
|
||||||
OP_CHECK_IF(
|
OP_CHECK_IF(
|
||||||
rstd_shape->GetStorageShape().GetDim(i) != x2_shape->GetStorageShape().GetDim(i),
|
rstd_shape->GetStorageShape().GetDim(i) != x2_shape->GetStorageShape().GetDim(i),
|
||||||
OP_LOGE(context, "Output rstd shape invaild, shape is not equal x1 first few dim."),
|
OP_LOGE(context, "Output rstd shape invalid, shape is not equal x1 first few dim."),
|
||||||
return false);
|
return false);
|
||||||
}
|
}
|
||||||
for (uint32_t i = 0; i < gammaDimNum; i++) {
|
for (uint32_t i = 0; i < gammaDimNum; i++) {
|
||||||
OP_CHECK_IF(
|
OP_CHECK_IF(
|
||||||
gamma_shape->GetStorageShape().GetDim(i) != x1_shape->GetStorageShape().GetDim(x1DimNum - gammaDimNum + i),
|
gamma_shape->GetStorageShape().GetDim(i) != x1_shape->GetStorageShape().GetDim(x1DimNum - gammaDimNum + i),
|
||||||
OP_LOGE(context, "Input gamma shape invaild, gamma shape is not equal x1 last few dim."),
|
OP_LOGE(context, "Input gamma shape invalid, gamma shape is not equal x1 last few dim."),
|
||||||
return false);
|
return false);
|
||||||
OP_CHECK_IF(
|
OP_CHECK_IF(
|
||||||
rstd_shape->GetStorageShape().GetDim(x1DimNum - 1 - i) != 1,
|
rstd_shape->GetStorageShape().GetDim(x1DimNum - 1 - i) != 1,
|
||||||
OP_LOGE(context, "Output rstd shape invaild, last few dim is not equal to 1."),
|
OP_LOGE(context, "Output rstd shape invalid, last few dim is not equal to 1."),
|
||||||
return false);
|
return false);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@@ -11,11 +11,11 @@ if [[ "$SOC_VERSION" =~ ^ascend310 ]]; then
|
|||||||
exit 0
|
exit 0
|
||||||
elif [[ "$SOC_VERSION" =~ ^ascend910b ]]; then
|
elif [[ "$SOC_VERSION" =~ ^ascend910b ]]; then
|
||||||
# ASCEND910B (A2) series
|
# ASCEND910B (A2) series
|
||||||
# depdendency: catlass
|
# dependency: catlass
|
||||||
git config --global --add safe.directory "$ROOT_DIR"
|
git config --global --add safe.directory "$ROOT_DIR"
|
||||||
CATLASS_PATH=${ROOT_DIR}/csrc/third_party/catlass/include
|
CATLASS_PATH=${ROOT_DIR}/csrc/third_party/catlass/include
|
||||||
if [[ ! -d "${CATLASS_PATH}" ]]; then
|
if [[ ! -d "${CATLASS_PATH}" ]]; then
|
||||||
echo "depdendency catlass is missing, try to fetch it..."
|
echo "dependency catlass is missing, try to fetch it..."
|
||||||
if ! git submodule update --init --recursive; then
|
if ! git submodule update --init --recursive; then
|
||||||
echo "fetch failed"
|
echo "fetch failed"
|
||||||
exit 1
|
exit 1
|
||||||
@@ -28,17 +28,17 @@ elif [[ "$SOC_VERSION" =~ ^ascend910b ]]; then
|
|||||||
SOC_ARG="ascend910b"
|
SOC_ARG="ascend910b"
|
||||||
elif [[ "$SOC_VERSION" =~ ^ascend910_93 ]]; then
|
elif [[ "$SOC_VERSION" =~ ^ascend910_93 ]]; then
|
||||||
# ASCEND910C (A3) series
|
# ASCEND910C (A3) series
|
||||||
# depdendency: catlass
|
# dependency: catlass
|
||||||
git config --global --add safe.directory "$ROOT_DIR"
|
git config --global --add safe.directory "$ROOT_DIR"
|
||||||
CATLASS_PATH=${ROOT_DIR}/csrc/third_party/catlass/include
|
CATLASS_PATH=${ROOT_DIR}/csrc/third_party/catlass/include
|
||||||
if [[ ! -d "${CATLASS_PATH}" ]]; then
|
if [[ ! -d "${CATLASS_PATH}" ]]; then
|
||||||
echo "depdendency catlass is missing, try to fetch it..."
|
echo "dependency catlass is missing, try to fetch it..."
|
||||||
if ! git submodule update --init --recursive; then
|
if ! git submodule update --init --recursive; then
|
||||||
echo "fetch failed"
|
echo "fetch failed"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
# depdendency: cann-toolkit file moe_distribute_base.h
|
# dependency: cann-toolkit file moe_distribute_base.h
|
||||||
HCCL_STRUCT_FILE_PATH=$(find -L "${ASCEND_TOOLKIT_HOME}" -name "moe_distribute_base.h" 2>/dev/null | head -n1)
|
HCCL_STRUCT_FILE_PATH=$(find -L "${ASCEND_TOOLKIT_HOME}" -name "moe_distribute_base.h" 2>/dev/null | head -n1)
|
||||||
if [ -z "$HCCL_STRUCT_FILE_PATH" ]; then
|
if [ -z "$HCCL_STRUCT_FILE_PATH" ]; then
|
||||||
echo "cannot find moe_distribute_base.h file in CANN env"
|
echo "cannot find moe_distribute_base.h file in CANN env"
|
||||||
|
|||||||
@@ -162,7 +162,7 @@ __aicore__ inline void MoeV2ExpertTokenOut::CopyOutExpertTokensCumsum(bool isTai
|
|||||||
this->expertTokenValue += this->expertTokenIdxOutLocal.GetValue(i);
|
this->expertTokenValue += this->expertTokenIdxOutLocal.GetValue(i);
|
||||||
this->expertTokenIdxOutLocal.SetValue(i, this->expertTokenValue);
|
this->expertTokenIdxOutLocal.SetValue(i, this->expertTokenValue);
|
||||||
}
|
}
|
||||||
// if the remianing UB is sufficient, use the UB space to copy
|
// if the remaining UB is sufficient, use the UB space to copy
|
||||||
// otherwise, copy the calculated data first, and then copy the last tokenValue to remaining expert position
|
// otherwise, copy the calculated data first, and then copy the last tokenValue to remaining expert position
|
||||||
if (isTail && end <= this->expertNumUbAlign) {
|
if (isTail && end <= this->expertNumUbAlign) {
|
||||||
int64_t startAlign = Min(Align(copyLength, sizeof(int32_t)), end);
|
int64_t startAlign = Min(Align(copyLength, sizeof(int32_t)), end);
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ namespace Catlass::Gemm::Tile {
|
|||||||
|
|
||||||
static constexpr uint32_t ELE_NUM_PER_C0 = BYTE_PER_C0 / sizeof(Element); // int64, 32/8=4
|
static constexpr uint32_t ELE_NUM_PER_C0 = BYTE_PER_C0 / sizeof(Element); // int64, 32/8=4
|
||||||
|
|
||||||
// Mehtods
|
// Methods
|
||||||
|
|
||||||
CATLASS_DEVICE
|
CATLASS_DEVICE
|
||||||
CopyGmToL1() {};
|
CopyGmToL1() {};
|
||||||
|
|||||||
@@ -177,7 +177,7 @@ __aicore__ inline void MoeV2ExpertTokenOut::CopyOutExpertTokensCumsum(bool isTai
|
|||||||
this->expertTokenValue += this->expertTokenIdxOutLocal.GetValue(i);
|
this->expertTokenValue += this->expertTokenIdxOutLocal.GetValue(i);
|
||||||
this->expertTokenIdxOutLocal.SetValue(i, this->expertTokenValue);
|
this->expertTokenIdxOutLocal.SetValue(i, this->expertTokenValue);
|
||||||
}
|
}
|
||||||
// if the remianing UB is sufficient, use the UB space to copy
|
// if the remaining UB is sufficient, use the UB space to copy
|
||||||
// otherwise, copy the calculated data first, and then copy the last tokenValue to remaining expert position
|
// otherwise, copy the calculated data first, and then copy the last tokenValue to remaining expert position
|
||||||
if (isTail && end <= this->expertNumUbAlign) {
|
if (isTail && end <= this->expertNumUbAlign) {
|
||||||
int64_t startAlign = Min(Align(copyLength, sizeof(int32_t)), end);
|
int64_t startAlign = Min(Align(copyLength, sizeof(int32_t)), end);
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ namespace Catlass::Gemm::Tile {
|
|||||||
|
|
||||||
static constexpr uint32_t ELE_NUM_PER_C0 = BYTE_PER_C0 / sizeof(Element); // int64, 32/8=4
|
static constexpr uint32_t ELE_NUM_PER_C0 = BYTE_PER_C0 / sizeof(Element); // int64, 32/8=4
|
||||||
|
|
||||||
// Mehtods
|
// Methods
|
||||||
|
|
||||||
CATLASS_DEVICE
|
CATLASS_DEVICE
|
||||||
CopyGmToL1() {};
|
CopyGmToL1() {};
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* funtion: aclnnDispatchLayoutGetWorkspaceSize
|
/* function: aclnnDispatchLayoutGetWorkspaceSize
|
||||||
* topkIdx : required
|
* topkIdx : required
|
||||||
* numTokens : required
|
* numTokens : required
|
||||||
* numRanks : required
|
* numRanks : required
|
||||||
@@ -31,7 +31,7 @@ __attribute__((visibility("default"))) aclnnStatus aclnnDispatchLayoutGetWorkspa
|
|||||||
uint64_t *workspaceSize,
|
uint64_t *workspaceSize,
|
||||||
aclOpExecutor **executor);
|
aclOpExecutor **executor);
|
||||||
|
|
||||||
/* funtion: aclnnDispatchLayout
|
/* function: aclnnDispatchLayout
|
||||||
* workspace : workspace memory addr(input).
|
* workspace : workspace memory addr(input).
|
||||||
* workspaceSize : size of workspace(input).
|
* workspaceSize : size of workspace(input).
|
||||||
* executor : executor context(input).
|
* executor : executor context(input).
|
||||||
|
|||||||
@@ -17,7 +17,7 @@
|
|||||||
|
|
||||||
#include "grouped_matmul_swiglu_quant_weight_nz_tensor_list_utils.h"
|
#include "grouped_matmul_swiglu_quant_weight_nz_tensor_list_utils.h"
|
||||||
namespace GROUPED_MATMUL_SWIGLU_QUANT_WEIGHT_NZ_TENSOR_LIST {
|
namespace GROUPED_MATMUL_SWIGLU_QUANT_WEIGHT_NZ_TENSOR_LIST {
|
||||||
/** @brief intenal computation class
|
/** @brief internal computation class
|
||||||
*/
|
*/
|
||||||
template <class mmType, bool sync = false, typename CHANNELDTYPE = float>
|
template <class mmType, bool sync = false, typename CHANNELDTYPE = float>
|
||||||
class GMMSwigluCompute{
|
class GMMSwigluCompute{
|
||||||
|
|||||||
@@ -374,7 +374,7 @@ ge::graphStatus LIInfoParser::GetHeadDim()
|
|||||||
dIndex = DIM_IDX_TWO;
|
dIndex = DIM_IDX_TWO;
|
||||||
break;
|
break;
|
||||||
case DataLayout::BSND:
|
case DataLayout::BSND:
|
||||||
// BSND: [Batch, SeqLen, N, D] -> D is the 3nd dimension
|
// BSND: [Batch, SeqLen, N, D] -> D is the 3rd dimension
|
||||||
dIndex = DIM_IDX_THREE;
|
dIndex = DIM_IDX_THREE;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* funtion: aclnnMoeCombineGetWorkspaceSize
|
/* function: aclnnMoeCombineGetWorkspaceSize
|
||||||
* recvX : required
|
* recvX : required
|
||||||
* tokenSrcInfo : required
|
* tokenSrcInfo : required
|
||||||
* epRecvCounts : required
|
* epRecvCounts : required
|
||||||
@@ -43,7 +43,7 @@ __attribute__((visibility("default"))) aclnnStatus aclnnMoeCombineNormalGetWorks
|
|||||||
uint64_t *workspaceSize,
|
uint64_t *workspaceSize,
|
||||||
aclOpExecutor **executor);
|
aclOpExecutor **executor);
|
||||||
|
|
||||||
/* funtion: aclnnMoeCombine
|
/* function: aclnnMoeCombine
|
||||||
* workspace : workspace memory addr(input).
|
* workspace : workspace memory addr(input).
|
||||||
* workspaceSize : size of workspace(input).
|
* workspaceSize : size of workspace(input).
|
||||||
* executor : executor context(input).
|
* executor : executor context(input).
|
||||||
|
|||||||
@@ -419,7 +419,7 @@ static ge::graphStatus SetWorkspace(gert::TilingContext *context, const char *no
|
|||||||
OPS_CHECK(workspace == nullptr, OPS_LOG_E(nodeName, "get workspace failed"),
|
OPS_CHECK(workspace == nullptr, OPS_LOG_E(nodeName, "get workspace failed"),
|
||||||
return ge::GRAPH_FAILED);
|
return ge::GRAPH_FAILED);
|
||||||
workspace[0] = SYSTEM_NEED_WORKSPACE;
|
workspace[0] = SYSTEM_NEED_WORKSPACE;
|
||||||
OPS_LOG_D(nodeName, "workspce[0] size is %ld", workspace[0]);
|
OPS_LOG_D(nodeName, "workspace[0] size is %ld", workspace[0]);
|
||||||
return ge::GRAPH_SUCCESS;
|
return ge::GRAPH_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ namespace ge {
|
|||||||
* @li out: A 2D tensor which is the renorm result of moe gating topk, format supports ND, and data type must be float. The shape must be the same as that of x.
|
* @li out: A 2D tensor which is the renorm result of moe gating topk, format supports ND, and data type must be float. The shape must be the same as that of x.
|
||||||
*
|
*
|
||||||
* @par Attributes:
|
* @par Attributes:
|
||||||
* @li k: A required attribute of type int. The value must greater than 0 and less than or equal to expert_num / group_count * k_group, idicating the topk value.
|
* @li k: A required attribute of type int. The value must greater than 0 and less than or equal to expert_num / group_count * k_group, indicating the topk value.
|
||||||
* @li k_group: An optional attribute of type int. It can not be less than 1, and can not be greater than group_count, indicating the topk group value. The default value is 1.
|
* @li k_group: An optional attribute of type int. It can not be less than 1, and can not be greater than group_count, indicating the topk group value. The default value is 1.
|
||||||
* @li group_count: An optional attribute of type int. It can not be less than 1, indicating the group count. The group_count * align_32(expert_num / group_count) can not be greater than 2048. The default value is 1.
|
* @li group_count: An optional attribute of type int. It can not be less than 1, indicating the group count. The group_count * align_32(expert_num / group_count) can not be greater than 2048. The default value is 1.
|
||||||
* @li group_select_mode: An optional attribute of type int. 0 indicating that sort group by max values, 1 indicating that sort group by sum of top-2 values. The default value is 0.
|
* @li group_select_mode: An optional attribute of type int. 0 indicating that sort group by max values, 1 indicating that sort group by sum of top-2 values. The default value is 0.
|
||||||
|
|||||||
@@ -444,11 +444,11 @@ ge::graphStatus MoeGatingTopKTilingBase::CheckOutShape()
|
|||||||
}
|
}
|
||||||
|
|
||||||
OP_CHECK_IF((yShape_->GetDim(0) != xShape_->GetDim(0)),
|
OP_CHECK_IF((yShape_->GetDim(0) != xShape_->GetDim(0)),
|
||||||
OP_LOGE(context_, "y out dim[0] %ld not euqal x dim[0] %ld, please check.", yShape_->GetDim(0),
|
OP_LOGE(context_, "y out dim[0] %ld not equal x dim[0] %ld, please check.", yShape_->GetDim(0),
|
||||||
xShape_->GetDim(0)),
|
xShape_->GetDim(0)),
|
||||||
return ge::GRAPH_FAILED);
|
return ge::GRAPH_FAILED);
|
||||||
OP_CHECK_IF((expertIdxShape_->GetDim(0) != xShape_->GetDim(0)),
|
OP_CHECK_IF((expertIdxShape_->GetDim(0) != xShape_->GetDim(0)),
|
||||||
OP_LOGE(context_, "expertId out dim[0] %ld not euqal x dim[0] %ld, please check.",
|
OP_LOGE(context_, "expertId out dim[0] %ld not equal x dim[0] %ld, please check.",
|
||||||
expertIdxShape_->GetDim(0), xShape_->GetDim(0)),
|
expertIdxShape_->GetDim(0), xShape_->GetDim(0)),
|
||||||
return ge::GRAPH_FAILED);
|
return ge::GRAPH_FAILED);
|
||||||
if (outFlag_ && outShape_ != nullptr) {
|
if (outFlag_ && outShape_ != nullptr) {
|
||||||
@@ -459,10 +459,10 @@ ge::graphStatus MoeGatingTopKTilingBase::CheckOutShape()
|
|||||||
}
|
}
|
||||||
|
|
||||||
OP_CHECK_IF((yShape_->GetDim(1) != k_),
|
OP_CHECK_IF((yShape_->GetDim(1) != k_),
|
||||||
OP_LOGE(context_, "y dim[1] %ld not euqal k %ld, please check.", yShape_->GetDim(1), k_),
|
OP_LOGE(context_, "y dim[1] %ld not equal k %ld, please check.", yShape_->GetDim(1), k_),
|
||||||
return ge::GRAPH_FAILED);
|
return ge::GRAPH_FAILED);
|
||||||
OP_CHECK_IF((expertIdxShape_->GetDim(1) != k_),
|
OP_CHECK_IF((expertIdxShape_->GetDim(1) != k_),
|
||||||
OP_LOGE(context_, "expertId dim[1] %ld not euqal k %ld, please check.", expertIdxShape_->GetDim(1), k_),
|
OP_LOGE(context_, "expertId dim[1] %ld not equal k %ld, please check.", expertIdxShape_->GetDim(1), k_),
|
||||||
return ge::GRAPH_FAILED);
|
return ge::GRAPH_FAILED);
|
||||||
if (outFlag_ && outShape_ != nullptr) {
|
if (outFlag_ && outShape_ != nullptr) {
|
||||||
OP_CHECK_IF((outShape_->GetDim(1) != xShape_->GetDim(1)),
|
OP_CHECK_IF((outShape_->GetDim(1) != xShape_->GetDim(1)),
|
||||||
|
|||||||
@@ -400,11 +400,11 @@ ge::graphStatus MoeGatingTopKTilingRegbase::CheckOutShape()
|
|||||||
}
|
}
|
||||||
|
|
||||||
OP_CHECK_IF((yShape_->GetDim(0) != xShape_->GetDim(0)),
|
OP_CHECK_IF((yShape_->GetDim(0) != xShape_->GetDim(0)),
|
||||||
OP_LOGE(context_, "y out dim[0] %ld not euqal x dim[0] %ld, please check.", yShape_->GetDim(0),
|
OP_LOGE(context_, "y out dim[0] %ld not equal x dim[0] %ld, please check.", yShape_->GetDim(0),
|
||||||
xShape_->GetDim(0)),
|
xShape_->GetDim(0)),
|
||||||
return ge::GRAPH_FAILED);
|
return ge::GRAPH_FAILED);
|
||||||
OP_CHECK_IF((expertIdxShape_->GetDim(0) != xShape_->GetDim(0)),
|
OP_CHECK_IF((expertIdxShape_->GetDim(0) != xShape_->GetDim(0)),
|
||||||
OP_LOGE(context_, "expertId out dim[0] %ld not euqal x dim[0] %ld, please check.",
|
OP_LOGE(context_, "expertId out dim[0] %ld not equal x dim[0] %ld, please check.",
|
||||||
expertIdxShape_->GetDim(0), xShape_->GetDim(0)),
|
expertIdxShape_->GetDim(0), xShape_->GetDim(0)),
|
||||||
return ge::GRAPH_FAILED);
|
return ge::GRAPH_FAILED);
|
||||||
if (outFlag_ && outShape_ != nullptr) {
|
if (outFlag_ && outShape_ != nullptr) {
|
||||||
@@ -415,10 +415,10 @@ ge::graphStatus MoeGatingTopKTilingRegbase::CheckOutShape()
|
|||||||
}
|
}
|
||||||
|
|
||||||
OP_CHECK_IF((yShape_->GetDim(1) != k_),
|
OP_CHECK_IF((yShape_->GetDim(1) != k_),
|
||||||
OP_LOGE(context_, "y dim[1] %ld not euqal k %ld, please check.", yShape_->GetDim(1), k_),
|
OP_LOGE(context_, "y dim[1] %ld not equal k %ld, please check.", yShape_->GetDim(1), k_),
|
||||||
return ge::GRAPH_FAILED);
|
return ge::GRAPH_FAILED);
|
||||||
OP_CHECK_IF((expertIdxShape_->GetDim(1) != k_),
|
OP_CHECK_IF((expertIdxShape_->GetDim(1) != k_),
|
||||||
OP_LOGE(context_, "expertId dim[1] %ld not euqal k %ld, please check.", expertIdxShape_->GetDim(1), k_),
|
OP_LOGE(context_, "expertId dim[1] %ld not equal k %ld, please check.", expertIdxShape_->GetDim(1), k_),
|
||||||
return ge::GRAPH_FAILED);
|
return ge::GRAPH_FAILED);
|
||||||
if (outFlag_ && outShape_ != nullptr) {
|
if (outFlag_ && outShape_ != nullptr) {
|
||||||
OP_CHECK_IF((outShape_->GetDim(1) != xShape_->GetDim(1)),
|
OP_CHECK_IF((outShape_->GetDim(1) != xShape_->GetDim(1)),
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ template <typename T, typename... Args> constexpr uint64_t RecursiveSum(T templa
|
|||||||
// Format: Represents the Format supported by the current tiling key, using InputLayout enum, occupies one decimal digit
|
// Format: Represents the Format supported by the current tiling key, using InputLayout enum, occupies one decimal digit
|
||||||
// Sparse: Represents whether the current tiling key supports Sparse, using SparseCapability enum, occupies one decimal digit
|
// Sparse: Represents whether the current tiling key supports Sparse, using SparseCapability enum, occupies one decimal digit
|
||||||
// For other specialized scenarios, define your own bit fields and values
|
// For other specialized scenarios, define your own bit fields and values
|
||||||
// usage: get tilingKey from inputed types
|
// usage: get tilingKey from inputted types
|
||||||
// uint64_t tilingKey = GET_FLASHATTENTION_TILINGKEY(AxisEnum::AXIS_S1, AxisEnum::AXIS_S2, AxisEnum::AXIS_N2,
|
// uint64_t tilingKey = GET_FLASHATTENTION_TILINGKEY(AxisEnum::AXIS_S1, AxisEnum::AXIS_S2, AxisEnum::AXIS_N2,
|
||||||
// SupportedDtype::FLOAT32, InputLayout::BSH, SparseCapability::SUPPORT_ALL)
|
// SupportedDtype::FLOAT32, InputLayout::BSH, SparseCapability::SUPPORT_ALL)
|
||||||
|
|
||||||
@@ -51,7 +51,7 @@ template <typename... Args> constexpr uint64_t GET_TILINGKEY(Args... templateIds
|
|||||||
return TILINGKEYOFFSET + RecursiveSum(templateIds...);
|
return TILINGKEYOFFSET + RecursiveSum(templateIds...);
|
||||||
}
|
}
|
||||||
|
|
||||||
// usage: get tilingKey from inputed types
|
// usage: get tilingKey from inputted types
|
||||||
// uint64_t tilingKey = TILINGKEY(S2, S1, N2, FLOAT32, BSND, ALL)
|
// uint64_t tilingKey = TILINGKEY(S2, S1, N2, FLOAT32, BSND, ALL)
|
||||||
|
|
||||||
#define TILINGKEY(ub2, ub1, block, dtype, layout, sparse) \
|
#define TILINGKEY(ub2, ub1, block, dtype, layout, sparse) \
|
||||||
|
|||||||
@@ -119,7 +119,7 @@ template <typename T, typename... Args> constexpr uint64_t RecursiveSum(T templa
|
|||||||
// Format: Represents the Format supported by the current tiling key, using InputLayout enum, occupies one decimal digit
|
// Format: Represents the Format supported by the current tiling key, using InputLayout enum, occupies one decimal digit
|
||||||
// Sparse: Represents whether the current tiling key supports Sparse, using SparseCapability enum, occupies one decimal digit
|
// Sparse: Represents whether the current tiling key supports Sparse, using SparseCapability enum, occupies one decimal digit
|
||||||
// For other specialized scenarios, define your own bit fields and values
|
// For other specialized scenarios, define your own bit fields and values
|
||||||
// usage: get tilingKey from inputed types
|
// usage: get tilingKey from inputted types
|
||||||
// uint64_t tilingKey = GET_FLASHATTENTION_TILINGKEY(AxisEnum::AXIS_S1, AxisEnum::AXIS_S2, AxisEnum::AXIS_N2,
|
// uint64_t tilingKey = GET_FLASHATTENTION_TILINGKEY(AxisEnum::AXIS_S1, AxisEnum::AXIS_S2, AxisEnum::AXIS_N2,
|
||||||
// SupportedDtype::FLOAT32, InputLayout::BSH, SparseCapability::SUPPORT_ALL)
|
// SupportedDtype::FLOAT32, InputLayout::BSH, SparseCapability::SUPPORT_ALL)
|
||||||
|
|
||||||
@@ -129,7 +129,7 @@ template <typename... Args> constexpr uint64_t GET_TILINGKEY(Args... templateIds
|
|||||||
return TILINGKEYOFFSET + RecursiveSum(templateIds...);
|
return TILINGKEYOFFSET + RecursiveSum(templateIds...);
|
||||||
}
|
}
|
||||||
|
|
||||||
// usage: get tilingKey from inputed types
|
// usage: get tilingKey from inputted types
|
||||||
// uint64_t tilingKey = TILINGKEY(S2, S1, N2, FLOAT32, BSND, ALL)
|
// uint64_t tilingKey = TILINGKEY(S2, S1, N2, FLOAT32, BSND, ALL)
|
||||||
|
|
||||||
#define TILINGKEY(ub2, ub1, block, dtype, layout, sparse) \
|
#define TILINGKEY(ub2, ub1, block, dtype, layout, sparse) \
|
||||||
|
|||||||
@@ -903,7 +903,7 @@ void MoeInitRountingCustomTilingBase::Tinlig4VBSMultiCoreCompute(MoeCustomVBSCom
|
|||||||
needCoreNum = std::min(needCoreNum, aivNum);
|
needCoreNum = std::min(needCoreNum, aivNum);
|
||||||
|
|
||||||
if (needCoreNum == 0) {
|
if (needCoreNum == 0) {
|
||||||
OPS_LOG_E(context_->GetNodeName(), "Variale needCoreNum cannot be 0.");
|
OPS_LOG_E(context_->GetNodeName(), "Variate needCoreNum cannot be 0.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
int64_t perCoreElements = (needCoreNum == 0) ? 0 : (totalLength_ / needCoreNum);
|
int64_t perCoreElements = (needCoreNum == 0) ? 0 : (totalLength_ / needCoreNum);
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ __aicore__ inline void MoeCustomFullLoadDynamicQuant<T, COPYOUTTYPE, SMOOTHTYPE>
|
|||||||
this->CopyIn();
|
this->CopyIn();
|
||||||
this->Compute();
|
this->Compute();
|
||||||
|
|
||||||
// vaild expert equal zero
|
// valid expert equal zero
|
||||||
if (this->needCoreNum_ < 1) {
|
if (this->needCoreNum_ < 1) {
|
||||||
if (this->blockIdx_ == 0) {
|
if (this->blockIdx_ == 0) {
|
||||||
if (this->rowIdxType_ == GATHER) {
|
if (this->rowIdxType_ == GATHER) {
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ __aicore__ inline void MoeCustomFullLoadStaticQuant<T>::Process()
|
|||||||
this->CopyIn();
|
this->CopyIn();
|
||||||
this->Compute();
|
this->Compute();
|
||||||
|
|
||||||
// vaild expert equal zero
|
// valid expert equal zero
|
||||||
if (this->needCoreNum_ < 1) {
|
if (this->needCoreNum_ < 1) {
|
||||||
if (this->blockIdx_ == 0) {
|
if (this->blockIdx_ == 0) {
|
||||||
if (this->rowIdxType_ == GATHER) {
|
if (this->rowIdxType_ == GATHER) {
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ __aicore__ inline void MoeCustomFullLoadUnquantized<T>::Process()
|
|||||||
this->CopyIn();
|
this->CopyIn();
|
||||||
this->Compute();
|
this->Compute();
|
||||||
|
|
||||||
// vaild expert equal zero
|
// valid expert equal zero
|
||||||
if (this->needCoreNum_ < 1) {
|
if (this->needCoreNum_ < 1) {
|
||||||
if (this->blockIdx_ == 0) {
|
if (this->blockIdx_ == 0) {
|
||||||
if (this->rowIdxType_ == GATHER) {
|
if (this->rowIdxType_ == GATHER) {
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* funtion: aclnnNotifyDispatchGetWorkspaceSize
|
/* function: aclnnNotifyDispatchGetWorkspaceSize
|
||||||
* parameters :
|
* parameters :
|
||||||
* sendData : required
|
* sendData : required
|
||||||
* tokenPerExpertData : required
|
* tokenPerExpertData : required
|
||||||
@@ -40,7 +40,7 @@ aclnnStatus aclnnNotifyDispatchGetWorkspaceSize(
|
|||||||
uint64_t *workspaceSize,
|
uint64_t *workspaceSize,
|
||||||
aclOpExecutor **executor);
|
aclOpExecutor **executor);
|
||||||
|
|
||||||
/* funtion: aclnnNotifyDispatch
|
/* function: aclnnNotifyDispatch
|
||||||
* parameters :
|
* parameters :
|
||||||
* workspace : workspace memory addr(input).
|
* workspace : workspace memory addr(input).
|
||||||
* workspaceSize : size of workspace(input).
|
* workspaceSize : size of workspace(input).
|
||||||
|
|||||||
@@ -1095,7 +1095,7 @@ ge::graphStatus SFATilingCheck::CheckActualSeqLens()
|
|||||||
if (std::string(opParamInfo_.layoutKV) == "TND" && opParamInfo_.actualSeqLengths.tensor == nullptr) {
|
if (std::string(opParamInfo_.layoutKV) == "TND" && opParamInfo_.actualSeqLengths.tensor == nullptr) {
|
||||||
OPS_LOG_E(opName_,
|
OPS_LOG_E(opName_,
|
||||||
"when the layout of key and value is TND, "
|
"when the layout of key and value is TND, "
|
||||||
"the actualSeqLengths of key and value shoule not be empty.");
|
"the actualSeqLengths of key and value should not be empty.");
|
||||||
return ge::GRAPH_PARAM_INVALID;
|
return ge::GRAPH_PARAM_INVALID;
|
||||||
}
|
}
|
||||||
if (ge::GRAPH_SUCCESS != CheckActualSeqLensDType() ||
|
if (ge::GRAPH_SUCCESS != CheckActualSeqLensDType() ||
|
||||||
|
|||||||
@@ -116,7 +116,7 @@ template <typename T, typename... Args> constexpr uint64_t RecursiveSum(T templa
|
|||||||
// Format: 表示当前tiling key支持的Format, 使用枚举InputLayout表示,占一个十进制位
|
// Format: 表示当前tiling key支持的Format, 使用枚举InputLayout表示,占一个十进制位
|
||||||
// Sparse: 表示当前tiling key是否支持Sparse,使用枚举SparseCapability表示,占一个十进制位
|
// Sparse: 表示当前tiling key是否支持Sparse,使用枚举SparseCapability表示,占一个十进制位
|
||||||
// 其余特化场景,定义自己的位域和值
|
// 其余特化场景,定义自己的位域和值
|
||||||
// usage: get tilingKey from inputed types
|
// usage: get tilingKey from inputted types
|
||||||
// uint64_t tilingKey = GET_FLASHATTENTION_TILINGKEY(AxisEnum::AXIS_S1, AxisEnum::AXIS_S2, AxisEnum::AXIS_N2,
|
// uint64_t tilingKey = GET_FLASHATTENTION_TILINGKEY(AxisEnum::AXIS_S1, AxisEnum::AXIS_S2, AxisEnum::AXIS_N2,
|
||||||
// SupportedDtype::FLOAT32, InputLayout::BSH, SparseCapability::SUPPORT_ALL)
|
// SupportedDtype::FLOAT32, InputLayout::BSH, SparseCapability::SUPPORT_ALL)
|
||||||
|
|
||||||
@@ -126,7 +126,7 @@ template <typename... Args> constexpr uint64_t GET_TILINGKEY(Args... templateIds
|
|||||||
return TILINGKEYOFFSET + RecursiveSum(templateIds...);
|
return TILINGKEYOFFSET + RecursiveSum(templateIds...);
|
||||||
}
|
}
|
||||||
|
|
||||||
// usage: get tilingKey from inputed types
|
// usage: get tilingKey from inputted types
|
||||||
// uint64_t tilingKey = TILINGKEY(S2, S1, N2, FLOAT32, BSND, ALL)
|
// uint64_t tilingKey = TILINGKEY(S2, S1, N2, FLOAT32, BSND, ALL)
|
||||||
|
|
||||||
#define TILINGKEY(ub2, ub1, block, dtype, layout, sparse) \
|
#define TILINGKEY(ub2, ub1, block, dtype, layout, sparse) \
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ Given that PCP and DCP behave similarly for KV cache sharding, we refer to them
|
|||||||
|
|
||||||
As illustrated, a virtual block is defined in the block table, where blocks within the same CP device group form a virtual block. The virtual block size is `virtual_block_size = block_size * cp_size`.
|
As illustrated, a virtual block is defined in the block table, where blocks within the same CP device group form a virtual block. The virtual block size is `virtual_block_size = block_size * cp_size`.
|
||||||
|
|
||||||
For any token `x`, referencing the folloing figure, its (virtual) block index is `x // virtual_block_size`, and the offset within the virtual block is `offset_within_virtual_block = x % virtual_block_size`.
|
For any token `x`, referencing the following figure, its (virtual) block index is `x // virtual_block_size`, and the offset within the virtual block is `offset_within_virtual_block = x % virtual_block_size`.
|
||||||
The local block index is `local_block_index = offset_within_virtual_block // cp_kv_cache_interleave_size`, and the device number is `target_rank = local_block_index % cp_size`.
|
The local block index is `local_block_index = offset_within_virtual_block // cp_kv_cache_interleave_size`, and the device number is `target_rank = local_block_index % cp_size`.
|
||||||
The offset within the local block is `(local_block_index // cp_size) * cp_kv_cache_interleave_size + offset_within_virtual_block % cp_kv_cache_interleave_size`.
|
The offset within the local block is `(local_block_index // cp_size) * cp_kv_cache_interleave_size + offset_within_virtual_block % cp_kv_cache_interleave_size`.
|
||||||
|
|
||||||
|
|||||||
@@ -699,7 +699,7 @@ The performance result is:
|
|||||||
|
|
||||||
**Input/Output**: 3.5k/1.5k
|
**Input/Output**: 3.5k/1.5k
|
||||||
|
|
||||||
**Performance**: TTFT = 6.16s, TPOT = 48.82ms, Average performance of each card is 478 TPS (Token Per Secon).
|
**Performance**: TTFT = 6.16s, TPOT = 48.82ms, Average performance of each card is 478 TPS (Token Per Second).
|
||||||
|
|
||||||
### Using vLLM Benchmark
|
### Using vLLM Benchmark
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
## v0.13.0rc2 - 2026.01.24
|
## v0.13.0rc2 - 2026.01.24
|
||||||
|
|
||||||
This is the second release candidate of v0.13.0 for vLLM Ascend. In this rc relesae, we fixed lots of bugs and improved the performance of many models. Please follow the [official doc](https://docs.vllm.ai/projects/ascend/en/v0.13.0/) to get started. Any feedback is welcome to help us to improve the final version of v0.13.0.
|
This is the second release candidate of v0.13.0 for vLLM Ascend. In this rc release, we fixed lots of bugs and improved the performance of many models. Please follow the [official doc](https://docs.vllm.ai/projects/ascend/en/v0.13.0/) to get started. Any feedback is welcome to help us to improve the final version of v0.13.0.
|
||||||
|
|
||||||
### Highlights
|
### Highlights
|
||||||
|
|
||||||
@@ -19,7 +19,7 @@ We mainly focus on quality and performance improvement in this release. The spec
|
|||||||
|
|
||||||
### Model Support
|
### Model Support
|
||||||
|
|
||||||
- LongCat-Flash is supproted now.[#3833](https://github.com/vllm-project/vllm-ascend/pull/3833)
|
- LongCat-Flash is supported now.[#3833](https://github.com/vllm-project/vllm-ascend/pull/3833)
|
||||||
- minimax_m2 is supported now. [#5624](https://github.com/vllm-project/vllm-ascend/pull/5624)
|
- minimax_m2 is supported now. [#5624](https://github.com/vllm-project/vllm-ascend/pull/5624)
|
||||||
- Support for cross-attention and whisper models [#5592](https://github.com/vllm-project/vllm-ascend/pull/5592)
|
- Support for cross-attention and whisper models [#5592](https://github.com/vllm-project/vllm-ascend/pull/5592)
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ Here is an example guiding how to use `launch_online_dp.py` to launch external d
|
|||||||
`run_dp_template.sh` is an template script used to launch each dp vllm instance separately. It will be called by `launch_online_dp.py` in multi threads and most of its configurations are set by `launch_online_dp.py`. Parameters you need to set manually include:
|
`run_dp_template.sh` is an template script used to launch each dp vllm instance separately. It will be called by `launch_online_dp.py` in multi threads and most of its configurations are set by `launch_online_dp.py`. Parameters you need to set manually include:
|
||||||
|
|
||||||
1. The IP and socket_ifname of your machine. If running on multi-nodes, please make sure the scripts on each node has been set with correct IP and socket_ifname of that node.
|
1. The IP and socket_ifname of your machine. If running on multi-nodes, please make sure the scripts on each node has been set with correct IP and socket_ifname of that node.
|
||||||
2. vLLM serving related parameters including model_path and other configurations. Note that port, dp-related parammeters and tp_size is set by `launch_online_dp.py`, all the other vLLM parameters in this file only serve as an example and you are free to modify them according to your purpose.
|
2. vLLM serving related parameters including model_path and other configurations. Note that port, dp-related parameters and tp_size is set by `launch_online_dp.py`, all the other vLLM parameters in this file only serve as an example and you are free to modify them according to your purpose.
|
||||||
|
|
||||||
### Run `launch_online_dp.py` with CL arguments
|
### Run `launch_online_dp.py` with CL arguments
|
||||||
|
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ locale = "en"
|
|||||||
extend-ignore-identifiers-re = [".*Unc.*", ".*_thw",
|
extend-ignore-identifiers-re = [".*Unc.*", ".*_thw",
|
||||||
".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*fo.*", ".*ba.*",
|
".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*fo.*", ".*ba.*",
|
||||||
".*ot.*", ".*[Tt]h[rR].*"]
|
".*ot.*", ".*[Tt]h[rR].*"]
|
||||||
extend-ignore-words-re = ["CANN", "cann","ND"]
|
extend-ignore-words-re = ["CANN", "cann","ND","alog"]
|
||||||
extend-ignore-re = []
|
extend-ignore-re = []
|
||||||
|
|
||||||
[default.extend-identifiers]
|
[default.extend-identifiers]
|
||||||
|
|||||||
Reference in New Issue
Block a user