[CI][lint] Add rule codespell back (#6236)

### What this PR does / why we need it?
After removing codepsell a while, we discovered that typo had a problem
correctly recognizing certain misspelled words, so I suggested adding it
back.

- vLLM version: v0.14.1
- vLLM main:
d68209402d

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
Li Wang
2026-01-26 14:12:33 +08:00
committed by GitHub
parent f4abd9b7b5
commit c26ad78f86
33 changed files with 67 additions and 56 deletions

2
.github/CODEOWNERS vendored
View File

@@ -29,7 +29,7 @@
/.readthedocs.yaml @wangxiyuan @Yikun
/README* @wangxiyuan @Yikun
# exmaple
# example
/examples @wangxiyuan
# tests

View File

@@ -18,7 +18,7 @@ body:
A brief introduction about the background of your use case, like your scenario, hardware size etc.
- type: textarea
attributes:
label: Bussiness Challenges
label: Business Challenges
description: >
Tell us how what kind of challenge you faced in this user story.
- type: textarea
@@ -30,7 +30,7 @@ body:
attributes:
label: Extra Info
description: >
Any extra infomation you want to include in this story
Any extra information you want to include in this story
- type: markdown
attributes:
value: >

View File

@@ -139,7 +139,7 @@ jobs:
quay.io/ascend/vllm-ascend
# Note for test case
# https://github.com/marketplace/actions/docker-metadata-action#typeref
# 1. branch job pulish per main/*-dev branch commits
# 1. branch job publish per main/*-dev branch commits
# 2. main and dev pull_request is build only, so the tag pr-N-openeuler is fine
# 3. only pep440 matched tag will be published:
# - v0.7.1 --> v0.7.1-openeuler

View File

@@ -11,6 +11,17 @@ repos:
- id: ruff-check
args: [--output-format, github, --fix]
- id: ruff-format
- repo: https://github.com/codespell-project/codespell
rev: v2.4.1
hooks:
- id: codespell
args: [
--toml, pyproject.toml,
'--skip', 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**,typos.toml',
'-L', 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,ArchType,AND,ND,tbe,copyin,alog'
]
additional_dependencies:
- tomli
- repo: https://github.com/crate-ci/typos
rev: v1.32.0
hooks:

View File

@@ -144,10 +144,10 @@ static bool CheckInputOutputDim(const gert::TilingContext* context)
OP_CHECK_IF(
x1DimNum != x2DimNum,
OP_LOGE(context, "Input x2/x1 shape invaild, dim num is not equal x1 dim."), return false);
OP_LOGE(context, "Input x2/x1 shape invalid, dim num is not equal x1 dim."), return false);
OP_CHECK_IF(
(yDimNum != xDimNum) || (xDimNum != x1DimNum) || (rstdDimNum != x1DimNum),
OP_LOGE(context, "Output y/x/rstd shape invaild, dim num is not equal x1 dim."), return false);
OP_LOGE(context, "Output y/x/rstd shape invalid, dim num is not equal x1 dim."), return false);
OP_CHECK_IF(
x1DimNum < gammaDimNum, OP_LOGE(context, "X1 dim num should not be smaller than gamma dim num."),
return false);
@@ -180,26 +180,26 @@ static bool CheckInputOutputShape(const gert::TilingContext* context)
return false);
OP_CHECK_IF(
x2_shape->GetStorageShape().GetDim(i) != x1_shape->GetStorageShape().GetDim(i),
OP_LOGE(context, "Input x2/x1 shape invaild, shape is not equal x1 shape."), return false);
OP_LOGE(context, "Input x2/x1 shape invalid, shape is not equal x1 shape."), return false);
OP_CHECK_IF(
(y_shape->GetStorageShape().GetDim(i) != x1_shape->GetStorageShape().GetDim(i)) ||
(x_shape->GetStorageShape().GetDim(i) != x1_shape->GetStorageShape().GetDim(i)),
OP_LOGE(context, "Input y/x shape invaild, shape is not equal x1 shape."), return false);
OP_LOGE(context, "Input y/x shape invalid, shape is not equal x1 shape."), return false);
}
for (uint32_t i = 0; i < x1DimNum - gammaDimNum; i++) {
OP_CHECK_IF(
rstd_shape->GetStorageShape().GetDim(i) != x2_shape->GetStorageShape().GetDim(i),
OP_LOGE(context, "Output rstd shape invaild, shape is not equal x1 first few dim."),
OP_LOGE(context, "Output rstd shape invalid, shape is not equal x1 first few dim."),
return false);
}
for (uint32_t i = 0; i < gammaDimNum; i++) {
OP_CHECK_IF(
gamma_shape->GetStorageShape().GetDim(i) != x1_shape->GetStorageShape().GetDim(x1DimNum - gammaDimNum + i),
OP_LOGE(context, "Input gamma shape invaild, gamma shape is not equal x1 last few dim."),
OP_LOGE(context, "Input gamma shape invalid, gamma shape is not equal x1 last few dim."),
return false);
OP_CHECK_IF(
rstd_shape->GetStorageShape().GetDim(x1DimNum - 1 - i) != 1,
OP_LOGE(context, "Output rstd shape invaild, last few dim is not equal to 1."),
OP_LOGE(context, "Output rstd shape invalid, last few dim is not equal to 1."),
return false);
}
return true;

View File

@@ -11,11 +11,11 @@ if [[ "$SOC_VERSION" =~ ^ascend310 ]]; then
exit 0
elif [[ "$SOC_VERSION" =~ ^ascend910b ]]; then
# ASCEND910B (A2) series
# depdendency: catlass
# dependency: catlass
git config --global --add safe.directory "$ROOT_DIR"
CATLASS_PATH=${ROOT_DIR}/csrc/third_party/catlass/include
if [[ ! -d "${CATLASS_PATH}" ]]; then
echo "depdendency catlass is missing, try to fetch it..."
echo "dependency catlass is missing, try to fetch it..."
if ! git submodule update --init --recursive; then
echo "fetch failed"
exit 1
@@ -28,17 +28,17 @@ elif [[ "$SOC_VERSION" =~ ^ascend910b ]]; then
SOC_ARG="ascend910b"
elif [[ "$SOC_VERSION" =~ ^ascend910_93 ]]; then
# ASCEND910C (A3) series
# depdendency: catlass
# dependency: catlass
git config --global --add safe.directory "$ROOT_DIR"
CATLASS_PATH=${ROOT_DIR}/csrc/third_party/catlass/include
if [[ ! -d "${CATLASS_PATH}" ]]; then
echo "depdendency catlass is missing, try to fetch it..."
echo "dependency catlass is missing, try to fetch it..."
if ! git submodule update --init --recursive; then
echo "fetch failed"
exit 1
fi
fi
# depdendency: cann-toolkit file moe_distribute_base.h
# dependency: cann-toolkit file moe_distribute_base.h
HCCL_STRUCT_FILE_PATH=$(find -L "${ASCEND_TOOLKIT_HOME}" -name "moe_distribute_base.h" 2>/dev/null | head -n1)
if [ -z "$HCCL_STRUCT_FILE_PATH" ]; then
echo "cannot find moe_distribute_base.h file in CANN env"

View File

@@ -162,7 +162,7 @@ __aicore__ inline void MoeV2ExpertTokenOut::CopyOutExpertTokensCumsum(bool isTai
this->expertTokenValue += this->expertTokenIdxOutLocal.GetValue(i);
this->expertTokenIdxOutLocal.SetValue(i, this->expertTokenValue);
}
// if the remianing UB is sufficient, use the UB space to copy
// if the remaining UB is sufficient, use the UB space to copy
// otherwise, copy the calculated data first, and then copy the last tokenValue to remaining expert position
if (isTail && end <= this->expertNumUbAlign) {
int64_t startAlign = Min(Align(copyLength, sizeof(int32_t)), end);

View File

@@ -13,7 +13,7 @@ namespace Catlass::Gemm::Tile {
static constexpr uint32_t ELE_NUM_PER_C0 = BYTE_PER_C0 / sizeof(Element); // int64, 32/8=4
// Mehtods
// Methods
CATLASS_DEVICE
CopyGmToL1() {};

View File

@@ -177,7 +177,7 @@ __aicore__ inline void MoeV2ExpertTokenOut::CopyOutExpertTokensCumsum(bool isTai
this->expertTokenValue += this->expertTokenIdxOutLocal.GetValue(i);
this->expertTokenIdxOutLocal.SetValue(i, this->expertTokenValue);
}
// if the remianing UB is sufficient, use the UB space to copy
// if the remaining UB is sufficient, use the UB space to copy
// otherwise, copy the calculated data first, and then copy the last tokenValue to remaining expert position
if (isTail && end <= this->expertNumUbAlign) {
int64_t startAlign = Min(Align(copyLength, sizeof(int32_t)), end);

View File

@@ -13,7 +13,7 @@ namespace Catlass::Gemm::Tile {
static constexpr uint32_t ELE_NUM_PER_C0 = BYTE_PER_C0 / sizeof(Element); // int64, 32/8=4
// Mehtods
// Methods
CATLASS_DEVICE
CopyGmToL1() {};

View File

@@ -7,7 +7,7 @@
extern "C" {
#endif
/* funtion: aclnnDispatchLayoutGetWorkspaceSize
/* function: aclnnDispatchLayoutGetWorkspaceSize
* topkIdx : required
* numTokens : required
* numRanks : required
@@ -31,7 +31,7 @@ __attribute__((visibility("default"))) aclnnStatus aclnnDispatchLayoutGetWorkspa
uint64_t *workspaceSize,
aclOpExecutor **executor);
/* funtion: aclnnDispatchLayout
/* function: aclnnDispatchLayout
* workspace : workspace memory addr(input).
* workspaceSize : size of workspace(input).
* executor : executor context(input).

View File

@@ -17,7 +17,7 @@
#include "grouped_matmul_swiglu_quant_weight_nz_tensor_list_utils.h"
namespace GROUPED_MATMUL_SWIGLU_QUANT_WEIGHT_NZ_TENSOR_LIST {
/** @brief intenal computation class
/** @brief internal computation class
*/
template <class mmType, bool sync = false, typename CHANNELDTYPE = float>
class GMMSwigluCompute{

View File

@@ -374,7 +374,7 @@ ge::graphStatus LIInfoParser::GetHeadDim()
dIndex = DIM_IDX_TWO;
break;
case DataLayout::BSND:
// BSND: [Batch, SeqLen, N, D] -> D is the 3nd dimension
// BSND: [Batch, SeqLen, N, D] -> D is the 3rd dimension
dIndex = DIM_IDX_THREE;
break;
default:

View File

@@ -7,7 +7,7 @@
extern "C" {
#endif
/* funtion: aclnnMoeCombineGetWorkspaceSize
/* function: aclnnMoeCombineGetWorkspaceSize
* recvX : required
* tokenSrcInfo : required
* epRecvCounts : required
@@ -43,7 +43,7 @@ __attribute__((visibility("default"))) aclnnStatus aclnnMoeCombineNormalGetWorks
uint64_t *workspaceSize,
aclOpExecutor **executor);
/* funtion: aclnnMoeCombine
/* function: aclnnMoeCombine
* workspace : workspace memory addr(input).
* workspaceSize : size of workspace(input).
* executor : executor context(input).

View File

@@ -419,7 +419,7 @@ static ge::graphStatus SetWorkspace(gert::TilingContext *context, const char *no
OPS_CHECK(workspace == nullptr, OPS_LOG_E(nodeName, "get workspace failed"),
return ge::GRAPH_FAILED);
workspace[0] = SYSTEM_NEED_WORKSPACE;
OPS_LOG_D(nodeName, "workspce[0] size is %ld", workspace[0]);
OPS_LOG_D(nodeName, "workspace[0] size is %ld", workspace[0]);
return ge::GRAPH_SUCCESS;
}

View File

@@ -34,7 +34,7 @@ namespace ge {
* @li out: A 2D tensor which is the renorm result of moe gating topk, format supports ND, and data type must be float. The shape must be the same as that of x.
*
* @par Attributes:
* @li k: A required attribute of type int. The value must greater than 0 and less than or equal to expert_num / group_count * k_group, idicating the topk value.
* @li k: A required attribute of type int. The value must greater than 0 and less than or equal to expert_num / group_count * k_group, indicating the topk value.
* @li k_group: An optional attribute of type int. It can not be less than 1, and can not be greater than group_count, indicating the topk group value. The default value is 1.
* @li group_count: An optional attribute of type int. It can not be less than 1, indicating the group count. The group_count * align_32(expert_num / group_count) can not be greater than 2048. The default value is 1.
* @li group_select_mode: An optional attribute of type int. 0 indicating that sort group by max values, 1 indicating that sort group by sum of top-2 values. The default value is 0.

View File

@@ -444,11 +444,11 @@ ge::graphStatus MoeGatingTopKTilingBase::CheckOutShape()
}
OP_CHECK_IF((yShape_->GetDim(0) != xShape_->GetDim(0)),
OP_LOGE(context_, "y out dim[0] %ld not euqal x dim[0] %ld, please check.", yShape_->GetDim(0),
OP_LOGE(context_, "y out dim[0] %ld not equal x dim[0] %ld, please check.", yShape_->GetDim(0),
xShape_->GetDim(0)),
return ge::GRAPH_FAILED);
OP_CHECK_IF((expertIdxShape_->GetDim(0) != xShape_->GetDim(0)),
OP_LOGE(context_, "expertId out dim[0] %ld not euqal x dim[0] %ld, please check.",
OP_LOGE(context_, "expertId out dim[0] %ld not equal x dim[0] %ld, please check.",
expertIdxShape_->GetDim(0), xShape_->GetDim(0)),
return ge::GRAPH_FAILED);
if (outFlag_ && outShape_ != nullptr) {
@@ -459,10 +459,10 @@ ge::graphStatus MoeGatingTopKTilingBase::CheckOutShape()
}
OP_CHECK_IF((yShape_->GetDim(1) != k_),
OP_LOGE(context_, "y dim[1] %ld not euqal k %ld, please check.", yShape_->GetDim(1), k_),
OP_LOGE(context_, "y dim[1] %ld not equal k %ld, please check.", yShape_->GetDim(1), k_),
return ge::GRAPH_FAILED);
OP_CHECK_IF((expertIdxShape_->GetDim(1) != k_),
OP_LOGE(context_, "expertId dim[1] %ld not euqal k %ld, please check.", expertIdxShape_->GetDim(1), k_),
OP_LOGE(context_, "expertId dim[1] %ld not equal k %ld, please check.", expertIdxShape_->GetDim(1), k_),
return ge::GRAPH_FAILED);
if (outFlag_ && outShape_ != nullptr) {
OP_CHECK_IF((outShape_->GetDim(1) != xShape_->GetDim(1)),

View File

@@ -400,11 +400,11 @@ ge::graphStatus MoeGatingTopKTilingRegbase::CheckOutShape()
}
OP_CHECK_IF((yShape_->GetDim(0) != xShape_->GetDim(0)),
OP_LOGE(context_, "y out dim[0] %ld not euqal x dim[0] %ld, please check.", yShape_->GetDim(0),
OP_LOGE(context_, "y out dim[0] %ld not equal x dim[0] %ld, please check.", yShape_->GetDim(0),
xShape_->GetDim(0)),
return ge::GRAPH_FAILED);
OP_CHECK_IF((expertIdxShape_->GetDim(0) != xShape_->GetDim(0)),
OP_LOGE(context_, "expertId out dim[0] %ld not euqal x dim[0] %ld, please check.",
OP_LOGE(context_, "expertId out dim[0] %ld not equal x dim[0] %ld, please check.",
expertIdxShape_->GetDim(0), xShape_->GetDim(0)),
return ge::GRAPH_FAILED);
if (outFlag_ && outShape_ != nullptr) {
@@ -415,10 +415,10 @@ ge::graphStatus MoeGatingTopKTilingRegbase::CheckOutShape()
}
OP_CHECK_IF((yShape_->GetDim(1) != k_),
OP_LOGE(context_, "y dim[1] %ld not euqal k %ld, please check.", yShape_->GetDim(1), k_),
OP_LOGE(context_, "y dim[1] %ld not equal k %ld, please check.", yShape_->GetDim(1), k_),
return ge::GRAPH_FAILED);
OP_CHECK_IF((expertIdxShape_->GetDim(1) != k_),
OP_LOGE(context_, "expertId dim[1] %ld not euqal k %ld, please check.", expertIdxShape_->GetDim(1), k_),
OP_LOGE(context_, "expertId dim[1] %ld not equal k %ld, please check.", expertIdxShape_->GetDim(1), k_),
return ge::GRAPH_FAILED);
if (outFlag_ && outShape_ != nullptr) {
OP_CHECK_IF((outShape_->GetDim(1) != xShape_->GetDim(1)),

View File

@@ -41,7 +41,7 @@ template <typename T, typename... Args> constexpr uint64_t RecursiveSum(T templa
// Format: Represents the Format supported by the current tiling key, using InputLayout enum, occupies one decimal digit
// Sparse: Represents whether the current tiling key supports Sparse, using SparseCapability enum, occupies one decimal digit
// For other specialized scenarios, define your own bit fields and values
// usage: get tilingKey from inputed types
// usage: get tilingKey from inputted types
// uint64_t tilingKey = GET_FLASHATTENTION_TILINGKEY(AxisEnum::AXIS_S1, AxisEnum::AXIS_S2, AxisEnum::AXIS_N2,
// SupportedDtype::FLOAT32, InputLayout::BSH, SparseCapability::SUPPORT_ALL)
@@ -51,7 +51,7 @@ template <typename... Args> constexpr uint64_t GET_TILINGKEY(Args... templateIds
return TILINGKEYOFFSET + RecursiveSum(templateIds...);
}
// usage: get tilingKey from inputed types
// usage: get tilingKey from inputted types
// uint64_t tilingKey = TILINGKEY(S2, S1, N2, FLOAT32, BSND, ALL)
#define TILINGKEY(ub2, ub1, block, dtype, layout, sparse) \

View File

@@ -119,7 +119,7 @@ template <typename T, typename... Args> constexpr uint64_t RecursiveSum(T templa
// Format: Represents the Format supported by the current tiling key, using InputLayout enum, occupies one decimal digit
// Sparse: Represents whether the current tiling key supports Sparse, using SparseCapability enum, occupies one decimal digit
// For other specialized scenarios, define your own bit fields and values
// usage: get tilingKey from inputed types
// usage: get tilingKey from inputted types
// uint64_t tilingKey = GET_FLASHATTENTION_TILINGKEY(AxisEnum::AXIS_S1, AxisEnum::AXIS_S2, AxisEnum::AXIS_N2,
// SupportedDtype::FLOAT32, InputLayout::BSH, SparseCapability::SUPPORT_ALL)
@@ -129,7 +129,7 @@ template <typename... Args> constexpr uint64_t GET_TILINGKEY(Args... templateIds
return TILINGKEYOFFSET + RecursiveSum(templateIds...);
}
// usage: get tilingKey from inputed types
// usage: get tilingKey from inputted types
// uint64_t tilingKey = TILINGKEY(S2, S1, N2, FLOAT32, BSND, ALL)
#define TILINGKEY(ub2, ub1, block, dtype, layout, sparse) \

View File

@@ -903,7 +903,7 @@ void MoeInitRountingCustomTilingBase::Tinlig4VBSMultiCoreCompute(MoeCustomVBSCom
needCoreNum = std::min(needCoreNum, aivNum);
if (needCoreNum == 0) {
OPS_LOG_E(context_->GetNodeName(), "Variale needCoreNum cannot be 0.");
OPS_LOG_E(context_->GetNodeName(), "Variate needCoreNum cannot be 0.");
return;
}
int64_t perCoreElements = (needCoreNum == 0) ? 0 : (totalLength_ / needCoreNum);

View File

@@ -82,7 +82,7 @@ __aicore__ inline void MoeCustomFullLoadDynamicQuant<T, COPYOUTTYPE, SMOOTHTYPE>
this->CopyIn();
this->Compute();
// vaild expert equal zero
// valid expert equal zero
if (this->needCoreNum_ < 1) {
if (this->blockIdx_ == 0) {
if (this->rowIdxType_ == GATHER) {

View File

@@ -85,7 +85,7 @@ __aicore__ inline void MoeCustomFullLoadStaticQuant<T>::Process()
this->CopyIn();
this->Compute();
// vaild expert equal zero
// valid expert equal zero
if (this->needCoreNum_ < 1) {
if (this->blockIdx_ == 0) {
if (this->rowIdxType_ == GATHER) {

View File

@@ -78,7 +78,7 @@ __aicore__ inline void MoeCustomFullLoadUnquantized<T>::Process()
this->CopyIn();
this->Compute();
// vaild expert equal zero
// valid expert equal zero
if (this->needCoreNum_ < 1) {
if (this->blockIdx_ == 0) {
if (this->rowIdxType_ == GATHER) {

View File

@@ -8,7 +8,7 @@
extern "C" {
#endif
/* funtion: aclnnNotifyDispatchGetWorkspaceSize
/* function: aclnnNotifyDispatchGetWorkspaceSize
* parameters :
* sendData : required
* tokenPerExpertData : required
@@ -40,7 +40,7 @@ aclnnStatus aclnnNotifyDispatchGetWorkspaceSize(
uint64_t *workspaceSize,
aclOpExecutor **executor);
/* funtion: aclnnNotifyDispatch
/* function: aclnnNotifyDispatch
* parameters :
* workspace : workspace memory addr(input).
* workspaceSize : size of workspace(input).

View File

@@ -1095,7 +1095,7 @@ ge::graphStatus SFATilingCheck::CheckActualSeqLens()
if (std::string(opParamInfo_.layoutKV) == "TND" && opParamInfo_.actualSeqLengths.tensor == nullptr) {
OPS_LOG_E(opName_,
"when the layout of key and value is TND, "
"the actualSeqLengths of key and value shoule not be empty.");
"the actualSeqLengths of key and value should not be empty.");
return ge::GRAPH_PARAM_INVALID;
}
if (ge::GRAPH_SUCCESS != CheckActualSeqLensDType() ||

View File

@@ -116,7 +116,7 @@ template <typename T, typename... Args> constexpr uint64_t RecursiveSum(T templa
// Format: 表示当前tiling key支持的Format, 使用枚举InputLayout表示占一个十进制位
// Sparse: 表示当前tiling key是否支持Sparse使用枚举SparseCapability表示占一个十进制位
// 其余特化场景,定义自己的位域和值
// usage: get tilingKey from inputed types
// usage: get tilingKey from inputted types
// uint64_t tilingKey = GET_FLASHATTENTION_TILINGKEY(AxisEnum::AXIS_S1, AxisEnum::AXIS_S2, AxisEnum::AXIS_N2,
// SupportedDtype::FLOAT32, InputLayout::BSH, SparseCapability::SUPPORT_ALL)
@@ -126,7 +126,7 @@ template <typename... Args> constexpr uint64_t GET_TILINGKEY(Args... templateIds
return TILINGKEYOFFSET + RecursiveSum(templateIds...);
}
// usage: get tilingKey from inputed types
// usage: get tilingKey from inputted types
// uint64_t tilingKey = TILINGKEY(S2, S1, N2, FLOAT32, BSND, ALL)
#define TILINGKEY(ub2, ub1, block, dtype, layout, sparse) \

View File

@@ -38,7 +38,7 @@ Given that PCP and DCP behave similarly for KV cache sharding, we refer to them
As illustrated, a virtual block is defined in the block table, where blocks within the same CP device group form a virtual block. The virtual block size is `virtual_block_size = block_size * cp_size`.
For any token `x`, referencing the folloing figure, its (virtual) block index is `x // virtual_block_size`, and the offset within the virtual block is `offset_within_virtual_block = x % virtual_block_size`.
For any token `x`, referencing the following figure, its (virtual) block index is `x // virtual_block_size`, and the offset within the virtual block is `offset_within_virtual_block = x % virtual_block_size`.
The local block index is `local_block_index = offset_within_virtual_block // cp_kv_cache_interleave_size`, and the device number is `target_rank = local_block_index % cp_size`.
The offset within the local block is `(local_block_index // cp_size) * cp_kv_cache_interleave_size + offset_within_virtual_block % cp_kv_cache_interleave_size`.

View File

@@ -699,7 +699,7 @@ The performance result is:
**Input/Output**: 3.5k/1.5k
**Performance**: TTFT = 6.16s, TPOT = 48.82ms, Average performance of each card is 478 TPS (Token Per Secon).
**Performance**: TTFT = 6.16s, TPOT = 48.82ms, Average performance of each card is 478 TPS (Token Per Second).
### Using vLLM Benchmark

View File

@@ -2,7 +2,7 @@
## v0.13.0rc2 - 2026.01.24
This is the second release candidate of v0.13.0 for vLLM Ascend. In this rc relesae, we fixed lots of bugs and improved the performance of many models. Please follow the [official doc](https://docs.vllm.ai/projects/ascend/en/v0.13.0/) to get started. Any feedback is welcome to help us to improve the final version of v0.13.0.
This is the second release candidate of v0.13.0 for vLLM Ascend. In this rc release, we fixed lots of bugs and improved the performance of many models. Please follow the [official doc](https://docs.vllm.ai/projects/ascend/en/v0.13.0/) to get started. Any feedback is welcome to help us to improve the final version of v0.13.0.
### Highlights
@@ -19,7 +19,7 @@ We mainly focus on quality and performance improvement in this release. The spec
### Model Support
- LongCat-Flash is supproted now.[#3833](https://github.com/vllm-project/vllm-ascend/pull/3833)
- LongCat-Flash is supported now.[#3833](https://github.com/vllm-project/vllm-ascend/pull/3833)
- minimax_m2 is supported now. [#5624](https://github.com/vllm-project/vllm-ascend/pull/5624)
- Support for cross-attention and whisper models [#5592](https://github.com/vllm-project/vllm-ascend/pull/5592)

View File

@@ -5,7 +5,7 @@ Here is an example guiding how to use `launch_online_dp.py` to launch external d
`run_dp_template.sh` is an template script used to launch each dp vllm instance separately. It will be called by `launch_online_dp.py` in multi threads and most of its configurations are set by `launch_online_dp.py`. Parameters you need to set manually include:
1. The IP and socket_ifname of your machine. If running on multi-nodes, please make sure the scripts on each node has been set with correct IP and socket_ifname of that node.
2. vLLM serving related parameters including model_path and other configurations. Note that port, dp-related parammeters and tp_size is set by `launch_online_dp.py`, all the other vLLM parameters in this file only serve as an example and you are free to modify them according to your purpose.
2. vLLM serving related parameters including model_path and other configurations. Note that port, dp-related parameters and tp_size is set by `launch_online_dp.py`, all the other vLLM parameters in this file only serve as an example and you are free to modify them according to your purpose.
### Run `launch_online_dp.py` with CL arguments

View File

@@ -19,7 +19,7 @@ locale = "en"
extend-ignore-identifiers-re = [".*Unc.*", ".*_thw",
".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*fo.*", ".*ba.*",
".*ot.*", ".*[Tt]h[rR].*"]
extend-ignore-words-re = ["CANN", "cann","ND"]
extend-ignore-words-re = ["CANN", "cann","ND","alog"]
extend-ignore-re = []
[default.extend-identifiers]

View File

@@ -33,5 +33,5 @@ class NPUWorker310(NPUWorker):
self.model_runner = NPUModelRunner310(self.vllm_config, self.device)
def _warm_up_atb(self):
# 310p device donot support torch_npu._npu_matmul_add_fp32 atb ops
# 310p device do not support torch_npu._npu_matmul_add_fp32 atb ops
logger.info("Skip warm-up atb ops for 310P device")