diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 259fbee6..332e2039 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -29,7 +29,7 @@ /.readthedocs.yaml @wangxiyuan @Yikun /README* @wangxiyuan @Yikun -# exmaple +# example /examples @wangxiyuan # tests diff --git a/.github/ISSUE_TEMPLATE/110-user-story.yml b/.github/ISSUE_TEMPLATE/110-user-story.yml index d9928e05..e8899066 100644 --- a/.github/ISSUE_TEMPLATE/110-user-story.yml +++ b/.github/ISSUE_TEMPLATE/110-user-story.yml @@ -18,7 +18,7 @@ body: A brief introduction about the background of your use case, like your scenario, hardware size etc. - type: textarea attributes: - label: Bussiness Challenges + label: Business Challenges description: > Tell us how what kind of challenge you faced in this user story. - type: textarea @@ -30,7 +30,7 @@ body: attributes: label: Extra Info description: > - Any extra infomation you want to include in this story + Any extra information you want to include in this story - type: markdown attributes: value: > diff --git a/.github/workflows/_schedule_image_build.yaml b/.github/workflows/_schedule_image_build.yaml index e7656d0e..09c6bb6f 100644 --- a/.github/workflows/_schedule_image_build.yaml +++ b/.github/workflows/_schedule_image_build.yaml @@ -139,7 +139,7 @@ jobs: quay.io/ascend/vllm-ascend # Note for test case # https://github.com/marketplace/actions/docker-metadata-action#typeref - # 1. branch job pulish per main/*-dev branch commits + # 1. branch job publish per main/*-dev branch commits # 2. main and dev pull_request is build only, so the tag pr-N-openeuler is fine # 3. only pep440 matched tag will be published: # - v0.7.1 --> v0.7.1-openeuler diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e18a67be..3ccc1806 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,6 +11,17 @@ repos: - id: ruff-check args: [--output-format, github, --fix] - id: ruff-format +- repo: https://github.com/codespell-project/codespell + rev: v2.4.1 + hooks: + - id: codespell + args: [ + --toml, pyproject.toml, + '--skip', 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**,typos.toml', + '-L', 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,ArchType,AND,ND,tbe,copyin,alog' + ] + additional_dependencies: + - tomli - repo: https://github.com/crate-ci/typos rev: v1.32.0 hooks: diff --git a/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_tiling.cpp b/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_tiling.cpp index a5b67b45..6db36066 100644 --- a/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_tiling.cpp +++ b/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_tiling.cpp @@ -144,10 +144,10 @@ static bool CheckInputOutputDim(const gert::TilingContext* context) OP_CHECK_IF( x1DimNum != x2DimNum, - OP_LOGE(context, "Input x2/x1 shape invaild, dim num is not equal x1 dim."), return false); + OP_LOGE(context, "Input x2/x1 shape invalid, dim num is not equal x1 dim."), return false); OP_CHECK_IF( (yDimNum != xDimNum) || (xDimNum != x1DimNum) || (rstdDimNum != x1DimNum), - OP_LOGE(context, "Output y/x/rstd shape invaild, dim num is not equal x1 dim."), return false); + OP_LOGE(context, "Output y/x/rstd shape invalid, dim num is not equal x1 dim."), return false); OP_CHECK_IF( x1DimNum < gammaDimNum, OP_LOGE(context, "X1 dim num should not be smaller than gamma dim num."), return false); @@ -180,26 +180,26 @@ static bool CheckInputOutputShape(const gert::TilingContext* context) return false); OP_CHECK_IF( x2_shape->GetStorageShape().GetDim(i) != x1_shape->GetStorageShape().GetDim(i), - OP_LOGE(context, "Input x2/x1 shape invaild, shape is not equal x1 shape."), return false); + OP_LOGE(context, "Input x2/x1 shape invalid, shape is not equal x1 shape."), return false); OP_CHECK_IF( (y_shape->GetStorageShape().GetDim(i) != x1_shape->GetStorageShape().GetDim(i)) || (x_shape->GetStorageShape().GetDim(i) != x1_shape->GetStorageShape().GetDim(i)), - OP_LOGE(context, "Input y/x shape invaild, shape is not equal x1 shape."), return false); + OP_LOGE(context, "Input y/x shape invalid, shape is not equal x1 shape."), return false); } for (uint32_t i = 0; i < x1DimNum - gammaDimNum; i++) { OP_CHECK_IF( rstd_shape->GetStorageShape().GetDim(i) != x2_shape->GetStorageShape().GetDim(i), - OP_LOGE(context, "Output rstd shape invaild, shape is not equal x1 first few dim."), + OP_LOGE(context, "Output rstd shape invalid, shape is not equal x1 first few dim."), return false); } for (uint32_t i = 0; i < gammaDimNum; i++) { OP_CHECK_IF( gamma_shape->GetStorageShape().GetDim(i) != x1_shape->GetStorageShape().GetDim(x1DimNum - gammaDimNum + i), - OP_LOGE(context, "Input gamma shape invaild, gamma shape is not equal x1 last few dim."), + OP_LOGE(context, "Input gamma shape invalid, gamma shape is not equal x1 last few dim."), return false); OP_CHECK_IF( rstd_shape->GetStorageShape().GetDim(x1DimNum - 1 - i) != 1, - OP_LOGE(context, "Output rstd shape invaild, last few dim is not equal to 1."), + OP_LOGE(context, "Output rstd shape invalid, last few dim is not equal to 1."), return false); } return true; diff --git a/csrc/build_aclnn.sh b/csrc/build_aclnn.sh index 9ed497f0..938d56a0 100644 --- a/csrc/build_aclnn.sh +++ b/csrc/build_aclnn.sh @@ -11,11 +11,11 @@ if [[ "$SOC_VERSION" =~ ^ascend310 ]]; then exit 0 elif [[ "$SOC_VERSION" =~ ^ascend910b ]]; then # ASCEND910B (A2) series - # depdendency: catlass + # dependency: catlass git config --global --add safe.directory "$ROOT_DIR" CATLASS_PATH=${ROOT_DIR}/csrc/third_party/catlass/include if [[ ! -d "${CATLASS_PATH}" ]]; then - echo "depdendency catlass is missing, try to fetch it..." + echo "dependency catlass is missing, try to fetch it..." if ! git submodule update --init --recursive; then echo "fetch failed" exit 1 @@ -28,17 +28,17 @@ elif [[ "$SOC_VERSION" =~ ^ascend910b ]]; then SOC_ARG="ascend910b" elif [[ "$SOC_VERSION" =~ ^ascend910_93 ]]; then # ASCEND910C (A3) series - # depdendency: catlass + # dependency: catlass git config --global --add safe.directory "$ROOT_DIR" CATLASS_PATH=${ROOT_DIR}/csrc/third_party/catlass/include if [[ ! -d "${CATLASS_PATH}" ]]; then - echo "depdendency catlass is missing, try to fetch it..." + echo "dependency catlass is missing, try to fetch it..." if ! git submodule update --init --recursive; then echo "fetch failed" exit 1 fi fi - # depdendency: cann-toolkit file moe_distribute_base.h + # dependency: cann-toolkit file moe_distribute_base.h HCCL_STRUCT_FILE_PATH=$(find -L "${ASCEND_TOOLKIT_HOME}" -name "moe_distribute_base.h" 2>/dev/null | head -n1) if [ -z "$HCCL_STRUCT_FILE_PATH" ]; then echo "cannot find moe_distribute_base.h file in CANN env" diff --git a/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_expert_token_out.h b/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_expert_token_out.h index ac4c1f95..ab14c240 100644 --- a/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_expert_token_out.h +++ b/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_expert_token_out.h @@ -162,7 +162,7 @@ __aicore__ inline void MoeV2ExpertTokenOut::CopyOutExpertTokensCumsum(bool isTai this->expertTokenValue += this->expertTokenIdxOutLocal.GetValue(i); this->expertTokenIdxOutLocal.SetValue(i, this->expertTokenValue); } - // if the remianing UB is sufficient, use the UB space to copy + // if the remaining UB is sufficient, use the UB space to copy // otherwise, copy the calculated data first, and then copy the last tokenValue to remaining expert position if (isTail && end <= this->expertNumUbAlign) { int64_t startAlign = Min(Align(copyLength, sizeof(int32_t)), end); diff --git a/csrc/dispatch_ffn_combine/op_kernel/utils/copy_gm_to_l1_custom.hpp b/csrc/dispatch_ffn_combine/op_kernel/utils/copy_gm_to_l1_custom.hpp index 84789073..e2954f43 100644 --- a/csrc/dispatch_ffn_combine/op_kernel/utils/copy_gm_to_l1_custom.hpp +++ b/csrc/dispatch_ffn_combine/op_kernel/utils/copy_gm_to_l1_custom.hpp @@ -13,7 +13,7 @@ namespace Catlass::Gemm::Tile { static constexpr uint32_t ELE_NUM_PER_C0 = BYTE_PER_C0 / sizeof(Element); // int64, 32/8=4 - // Mehtods + // Methods CATLASS_DEVICE CopyGmToL1() {}; diff --git a/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_expert_token_out.h b/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_expert_token_out.h index 6a022e97..43a67050 100644 --- a/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_expert_token_out.h +++ b/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_expert_token_out.h @@ -177,7 +177,7 @@ __aicore__ inline void MoeV2ExpertTokenOut::CopyOutExpertTokensCumsum(bool isTai this->expertTokenValue += this->expertTokenIdxOutLocal.GetValue(i); this->expertTokenIdxOutLocal.SetValue(i, this->expertTokenValue); } - // if the remianing UB is sufficient, use the UB space to copy + // if the remaining UB is sufficient, use the UB space to copy // otherwise, copy the calculated data first, and then copy the last tokenValue to remaining expert position if (isTail && end <= this->expertNumUbAlign) { int64_t startAlign = Min(Align(copyLength, sizeof(int32_t)), end); diff --git a/csrc/dispatch_ffn_combine_bf16/op_kernel/utils/copy_gm_to_l1_custom.hpp b/csrc/dispatch_ffn_combine_bf16/op_kernel/utils/copy_gm_to_l1_custom.hpp index 84789073..e2954f43 100644 --- a/csrc/dispatch_ffn_combine_bf16/op_kernel/utils/copy_gm_to_l1_custom.hpp +++ b/csrc/dispatch_ffn_combine_bf16/op_kernel/utils/copy_gm_to_l1_custom.hpp @@ -13,7 +13,7 @@ namespace Catlass::Gemm::Tile { static constexpr uint32_t ELE_NUM_PER_C0 = BYTE_PER_C0 / sizeof(Element); // int64, 32/8=4 - // Mehtods + // Methods CATLASS_DEVICE CopyGmToL1() {}; diff --git a/csrc/dispatch_layout/op_host/aclnn_dispatch_layout.h b/csrc/dispatch_layout/op_host/aclnn_dispatch_layout.h index 20926bab..5ad9417a 100644 --- a/csrc/dispatch_layout/op_host/aclnn_dispatch_layout.h +++ b/csrc/dispatch_layout/op_host/aclnn_dispatch_layout.h @@ -7,7 +7,7 @@ extern "C" { #endif -/* funtion: aclnnDispatchLayoutGetWorkspaceSize +/* function: aclnnDispatchLayoutGetWorkspaceSize * topkIdx : required * numTokens : required * numRanks : required @@ -31,7 +31,7 @@ __attribute__((visibility("default"))) aclnnStatus aclnnDispatchLayoutGetWorkspa uint64_t *workspaceSize, aclOpExecutor **executor); -/* funtion: aclnnDispatchLayout +/* function: aclnnDispatchLayout * workspace : workspace memory addr(input). * workspaceSize : size of workspace(input). * executor : executor context(input). diff --git a/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h b/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h index 45d95488..6e9d371f 100644 --- a/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +++ b/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h @@ -17,7 +17,7 @@ #include "grouped_matmul_swiglu_quant_weight_nz_tensor_list_utils.h" namespace GROUPED_MATMUL_SWIGLU_QUANT_WEIGHT_NZ_TENSOR_LIST { -/** @brief intenal computation class +/** @brief internal computation class */ template class GMMSwigluCompute{ diff --git a/csrc/lightning_indexer/op_host/lightning_indexer_tiling.cpp b/csrc/lightning_indexer/op_host/lightning_indexer_tiling.cpp index ff7f7843..ae49996b 100644 --- a/csrc/lightning_indexer/op_host/lightning_indexer_tiling.cpp +++ b/csrc/lightning_indexer/op_host/lightning_indexer_tiling.cpp @@ -374,7 +374,7 @@ ge::graphStatus LIInfoParser::GetHeadDim() dIndex = DIM_IDX_TWO; break; case DataLayout::BSND: - // BSND: [Batch, SeqLen, N, D] -> D is the 3nd dimension + // BSND: [Batch, SeqLen, N, D] -> D is the 3rd dimension dIndex = DIM_IDX_THREE; break; default: diff --git a/csrc/moe_combine_normal/op_host/aclnn_moe_combine_normal.h b/csrc/moe_combine_normal/op_host/aclnn_moe_combine_normal.h index 50ba7122..a8f775b3 100644 --- a/csrc/moe_combine_normal/op_host/aclnn_moe_combine_normal.h +++ b/csrc/moe_combine_normal/op_host/aclnn_moe_combine_normal.h @@ -7,7 +7,7 @@ extern "C" { #endif -/* funtion: aclnnMoeCombineGetWorkspaceSize +/* function: aclnnMoeCombineGetWorkspaceSize * recvX : required * tokenSrcInfo : required * epRecvCounts : required @@ -43,7 +43,7 @@ __attribute__((visibility("default"))) aclnnStatus aclnnMoeCombineNormalGetWorks uint64_t *workspaceSize, aclOpExecutor **executor); -/* funtion: aclnnMoeCombine +/* function: aclnnMoeCombine * workspace : workspace memory addr(input). * workspaceSize : size of workspace(input). * executor : executor context(input). diff --git a/csrc/moe_combine_normal/op_host/moe_combine_normal_tiling.cpp b/csrc/moe_combine_normal/op_host/moe_combine_normal_tiling.cpp index 66b3ab3d..ba7a76fe 100644 --- a/csrc/moe_combine_normal/op_host/moe_combine_normal_tiling.cpp +++ b/csrc/moe_combine_normal/op_host/moe_combine_normal_tiling.cpp @@ -419,7 +419,7 @@ static ge::graphStatus SetWorkspace(gert::TilingContext *context, const char *no OPS_CHECK(workspace == nullptr, OPS_LOG_E(nodeName, "get workspace failed"), return ge::GRAPH_FAILED); workspace[0] = SYSTEM_NEED_WORKSPACE; - OPS_LOG_D(nodeName, "workspce[0] size is %ld", workspace[0]); + OPS_LOG_D(nodeName, "workspace[0] size is %ld", workspace[0]); return ge::GRAPH_SUCCESS; } diff --git a/csrc/moe_gating_top_k/op_host/moe_gating_top_k_proto.h b/csrc/moe_gating_top_k/op_host/moe_gating_top_k_proto.h index 068acdfc..ed50d344 100644 --- a/csrc/moe_gating_top_k/op_host/moe_gating_top_k_proto.h +++ b/csrc/moe_gating_top_k/op_host/moe_gating_top_k_proto.h @@ -34,7 +34,7 @@ namespace ge { * @li out: A 2D tensor which is the renorm result of moe gating topk, format supports ND, and data type must be float. The shape must be the same as that of x. * * @par Attributes: - * @li k: A required attribute of type int. The value must greater than 0 and less than or equal to expert_num / group_count * k_group, idicating the topk value. + * @li k: A required attribute of type int. The value must greater than 0 and less than or equal to expert_num / group_count * k_group, indicating the topk value. * @li k_group: An optional attribute of type int. It can not be less than 1, and can not be greater than group_count, indicating the topk group value. The default value is 1. * @li group_count: An optional attribute of type int. It can not be less than 1, indicating the group count. The group_count * align_32(expert_num / group_count) can not be greater than 2048. The default value is 1. * @li group_select_mode: An optional attribute of type int. 0 indicating that sort group by max values, 1 indicating that sort group by sum of top-2 values. The default value is 0. diff --git a/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling.cpp b/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling.cpp index 1e17b616..735fac0d 100644 --- a/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling.cpp +++ b/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling.cpp @@ -444,11 +444,11 @@ ge::graphStatus MoeGatingTopKTilingBase::CheckOutShape() } OP_CHECK_IF((yShape_->GetDim(0) != xShape_->GetDim(0)), - OP_LOGE(context_, "y out dim[0] %ld not euqal x dim[0] %ld, please check.", yShape_->GetDim(0), + OP_LOGE(context_, "y out dim[0] %ld not equal x dim[0] %ld, please check.", yShape_->GetDim(0), xShape_->GetDim(0)), return ge::GRAPH_FAILED); OP_CHECK_IF((expertIdxShape_->GetDim(0) != xShape_->GetDim(0)), - OP_LOGE(context_, "expertId out dim[0] %ld not euqal x dim[0] %ld, please check.", + OP_LOGE(context_, "expertId out dim[0] %ld not equal x dim[0] %ld, please check.", expertIdxShape_->GetDim(0), xShape_->GetDim(0)), return ge::GRAPH_FAILED); if (outFlag_ && outShape_ != nullptr) { @@ -459,10 +459,10 @@ ge::graphStatus MoeGatingTopKTilingBase::CheckOutShape() } OP_CHECK_IF((yShape_->GetDim(1) != k_), - OP_LOGE(context_, "y dim[1] %ld not euqal k %ld, please check.", yShape_->GetDim(1), k_), + OP_LOGE(context_, "y dim[1] %ld not equal k %ld, please check.", yShape_->GetDim(1), k_), return ge::GRAPH_FAILED); OP_CHECK_IF((expertIdxShape_->GetDim(1) != k_), - OP_LOGE(context_, "expertId dim[1] %ld not euqal k %ld, please check.", expertIdxShape_->GetDim(1), k_), + OP_LOGE(context_, "expertId dim[1] %ld not equal k %ld, please check.", expertIdxShape_->GetDim(1), k_), return ge::GRAPH_FAILED); if (outFlag_ && outShape_ != nullptr) { OP_CHECK_IF((outShape_->GetDim(1) != xShape_->GetDim(1)), diff --git a/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling_arch35.cpp b/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling_arch35.cpp index f170d3dd..803b3746 100644 --- a/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling_arch35.cpp +++ b/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling_arch35.cpp @@ -400,11 +400,11 @@ ge::graphStatus MoeGatingTopKTilingRegbase::CheckOutShape() } OP_CHECK_IF((yShape_->GetDim(0) != xShape_->GetDim(0)), - OP_LOGE(context_, "y out dim[0] %ld not euqal x dim[0] %ld, please check.", yShape_->GetDim(0), + OP_LOGE(context_, "y out dim[0] %ld not equal x dim[0] %ld, please check.", yShape_->GetDim(0), xShape_->GetDim(0)), return ge::GRAPH_FAILED); OP_CHECK_IF((expertIdxShape_->GetDim(0) != xShape_->GetDim(0)), - OP_LOGE(context_, "expertId out dim[0] %ld not euqal x dim[0] %ld, please check.", + OP_LOGE(context_, "expertId out dim[0] %ld not equal x dim[0] %ld, please check.", expertIdxShape_->GetDim(0), xShape_->GetDim(0)), return ge::GRAPH_FAILED); if (outFlag_ && outShape_ != nullptr) { @@ -415,10 +415,10 @@ ge::graphStatus MoeGatingTopKTilingRegbase::CheckOutShape() } OP_CHECK_IF((yShape_->GetDim(1) != k_), - OP_LOGE(context_, "y dim[1] %ld not euqal k %ld, please check.", yShape_->GetDim(1), k_), + OP_LOGE(context_, "y dim[1] %ld not equal k %ld, please check.", yShape_->GetDim(1), k_), return ge::GRAPH_FAILED); OP_CHECK_IF((expertIdxShape_->GetDim(1) != k_), - OP_LOGE(context_, "expertId dim[1] %ld not euqal k %ld, please check.", expertIdxShape_->GetDim(1), k_), + OP_LOGE(context_, "expertId dim[1] %ld not equal k %ld, please check.", expertIdxShape_->GetDim(1), k_), return ge::GRAPH_FAILED); if (outFlag_ && outShape_ != nullptr) { OP_CHECK_IF((outShape_->GetDim(1) != xShape_->GetDim(1)), diff --git a/csrc/moe_gating_top_k/tiling_base/tiling_key.h b/csrc/moe_gating_top_k/tiling_base/tiling_key.h index ddc105cf..607f965b 100644 --- a/csrc/moe_gating_top_k/tiling_base/tiling_key.h +++ b/csrc/moe_gating_top_k/tiling_base/tiling_key.h @@ -41,7 +41,7 @@ template constexpr uint64_t RecursiveSum(T templa // Format: Represents the Format supported by the current tiling key, using InputLayout enum, occupies one decimal digit // Sparse: Represents whether the current tiling key supports Sparse, using SparseCapability enum, occupies one decimal digit // For other specialized scenarios, define your own bit fields and values -// usage: get tilingKey from inputed types +// usage: get tilingKey from inputted types // uint64_t tilingKey = GET_FLASHATTENTION_TILINGKEY(AxisEnum::AXIS_S1, AxisEnum::AXIS_S2, AxisEnum::AXIS_N2, // SupportedDtype::FLOAT32, InputLayout::BSH, SparseCapability::SUPPORT_ALL) @@ -51,7 +51,7 @@ template constexpr uint64_t GET_TILINGKEY(Args... templateIds return TILINGKEYOFFSET + RecursiveSum(templateIds...); } -// usage: get tilingKey from inputed types +// usage: get tilingKey from inputted types // uint64_t tilingKey = TILINGKEY(S2, S1, N2, FLOAT32, BSND, ALL) #define TILINGKEY(ub2, ub1, block, dtype, layout, sparse) \ diff --git a/csrc/moe_gating_top_k/tiling_base/tiling_type.h b/csrc/moe_gating_top_k/tiling_base/tiling_type.h index 1f4b29c7..7c781d19 100644 --- a/csrc/moe_gating_top_k/tiling_base/tiling_type.h +++ b/csrc/moe_gating_top_k/tiling_base/tiling_type.h @@ -119,7 +119,7 @@ template constexpr uint64_t RecursiveSum(T templa // Format: Represents the Format supported by the current tiling key, using InputLayout enum, occupies one decimal digit // Sparse: Represents whether the current tiling key supports Sparse, using SparseCapability enum, occupies one decimal digit // For other specialized scenarios, define your own bit fields and values -// usage: get tilingKey from inputed types +// usage: get tilingKey from inputted types // uint64_t tilingKey = GET_FLASHATTENTION_TILINGKEY(AxisEnum::AXIS_S1, AxisEnum::AXIS_S2, AxisEnum::AXIS_N2, // SupportedDtype::FLOAT32, InputLayout::BSH, SparseCapability::SUPPORT_ALL) @@ -129,7 +129,7 @@ template constexpr uint64_t GET_TILINGKEY(Args... templateIds return TILINGKEYOFFSET + RecursiveSum(templateIds...); } -// usage: get tilingKey from inputed types +// usage: get tilingKey from inputted types // uint64_t tilingKey = TILINGKEY(S2, S1, N2, FLOAT32, BSND, ALL) #define TILINGKEY(ub2, ub1, block, dtype, layout, sparse) \ diff --git a/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling.cpp b/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling.cpp index 411ec97f..608ee253 100644 --- a/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling.cpp +++ b/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling.cpp @@ -903,7 +903,7 @@ void MoeInitRountingCustomTilingBase::Tinlig4VBSMultiCoreCompute(MoeCustomVBSCom needCoreNum = std::min(needCoreNum, aivNum); if (needCoreNum == 0) { - OPS_LOG_E(context_->GetNodeName(), "Variale needCoreNum cannot be 0."); + OPS_LOG_E(context_->GetNodeName(), "Variate needCoreNum cannot be 0."); return; } int64_t perCoreElements = (needCoreNum == 0) ? 0 : (totalLength_ / needCoreNum); diff --git a/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_dynamic_quant.h b/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_dynamic_quant.h index 5d7010f3..3919445e 100644 --- a/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_dynamic_quant.h +++ b/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_dynamic_quant.h @@ -82,7 +82,7 @@ __aicore__ inline void MoeCustomFullLoadDynamicQuant this->CopyIn(); this->Compute(); - // vaild expert equal zero + // valid expert equal zero if (this->needCoreNum_ < 1) { if (this->blockIdx_ == 0) { if (this->rowIdxType_ == GATHER) { diff --git a/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_static_quant.h b/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_static_quant.h index e2c074d3..6a8991b0 100644 --- a/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_static_quant.h +++ b/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_static_quant.h @@ -85,7 +85,7 @@ __aicore__ inline void MoeCustomFullLoadStaticQuant::Process() this->CopyIn(); this->Compute(); - // vaild expert equal zero + // valid expert equal zero if (this->needCoreNum_ < 1) { if (this->blockIdx_ == 0) { if (this->rowIdxType_ == GATHER) { diff --git a/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_unquantized.h b/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_unquantized.h index 2fbced98..99cd28ac 100644 --- a/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_unquantized.h +++ b/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_unquantized.h @@ -78,7 +78,7 @@ __aicore__ inline void MoeCustomFullLoadUnquantized::Process() this->CopyIn(); this->Compute(); - // vaild expert equal zero + // valid expert equal zero if (this->needCoreNum_ < 1) { if (this->blockIdx_ == 0) { if (this->rowIdxType_ == GATHER) { diff --git a/csrc/notify_dispatch/op_host/aclnn_notify_dispatch.h b/csrc/notify_dispatch/op_host/aclnn_notify_dispatch.h index be9ae04f..1264af35 100644 --- a/csrc/notify_dispatch/op_host/aclnn_notify_dispatch.h +++ b/csrc/notify_dispatch/op_host/aclnn_notify_dispatch.h @@ -8,7 +8,7 @@ extern "C" { #endif -/* funtion: aclnnNotifyDispatchGetWorkspaceSize +/* function: aclnnNotifyDispatchGetWorkspaceSize * parameters : * sendData : required * tokenPerExpertData : required @@ -40,7 +40,7 @@ aclnnStatus aclnnNotifyDispatchGetWorkspaceSize( uint64_t *workspaceSize, aclOpExecutor **executor); -/* funtion: aclnnNotifyDispatch +/* function: aclnnNotifyDispatch * parameters : * workspace : workspace memory addr(input). * workspaceSize : size of workspace(input). diff --git a/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.cpp b/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.cpp index 11a0ddee..65040919 100644 --- a/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.cpp +++ b/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.cpp @@ -1095,7 +1095,7 @@ ge::graphStatus SFATilingCheck::CheckActualSeqLens() if (std::string(opParamInfo_.layoutKV) == "TND" && opParamInfo_.actualSeqLengths.tensor == nullptr) { OPS_LOG_E(opName_, "when the layout of key and value is TND, " - "the actualSeqLengths of key and value shoule not be empty."); + "the actualSeqLengths of key and value should not be empty."); return ge::GRAPH_PARAM_INVALID; } if (ge::GRAPH_SUCCESS != CheckActualSeqLensDType() || diff --git a/csrc/utils/inc/tiling/tiling_type.h b/csrc/utils/inc/tiling/tiling_type.h index d417b0b6..dfdd67f6 100644 --- a/csrc/utils/inc/tiling/tiling_type.h +++ b/csrc/utils/inc/tiling/tiling_type.h @@ -116,7 +116,7 @@ template constexpr uint64_t RecursiveSum(T templa // Format: 表示当前tiling key支持的Format, 使用枚举InputLayout表示,占一个十进制位 // Sparse: 表示当前tiling key是否支持Sparse,使用枚举SparseCapability表示,占一个十进制位 // 其余特化场景,定义自己的位域和值 -// usage: get tilingKey from inputed types +// usage: get tilingKey from inputted types // uint64_t tilingKey = GET_FLASHATTENTION_TILINGKEY(AxisEnum::AXIS_S1, AxisEnum::AXIS_S2, AxisEnum::AXIS_N2, // SupportedDtype::FLOAT32, InputLayout::BSH, SparseCapability::SUPPORT_ALL) @@ -126,7 +126,7 @@ template constexpr uint64_t GET_TILINGKEY(Args... templateIds return TILINGKEYOFFSET + RecursiveSum(templateIds...); } -// usage: get tilingKey from inputed types +// usage: get tilingKey from inputted types // uint64_t tilingKey = TILINGKEY(S2, S1, N2, FLOAT32, BSND, ALL) #define TILINGKEY(ub2, ub1, block, dtype, layout, sparse) \ diff --git a/docs/source/developer_guide/feature_guide/context_parallel.md b/docs/source/developer_guide/feature_guide/context_parallel.md index 0de85507..0a4d423d 100644 --- a/docs/source/developer_guide/feature_guide/context_parallel.md +++ b/docs/source/developer_guide/feature_guide/context_parallel.md @@ -38,7 +38,7 @@ Given that PCP and DCP behave similarly for KV cache sharding, we refer to them As illustrated, a virtual block is defined in the block table, where blocks within the same CP device group form a virtual block. The virtual block size is `virtual_block_size = block_size * cp_size`. -For any token `x`, referencing the folloing figure, its (virtual) block index is `x // virtual_block_size`, and the offset within the virtual block is `offset_within_virtual_block = x % virtual_block_size`. +For any token `x`, referencing the following figure, its (virtual) block index is `x // virtual_block_size`, and the offset within the virtual block is `offset_within_virtual_block = x % virtual_block_size`. The local block index is `local_block_index = offset_within_virtual_block // cp_kv_cache_interleave_size`, and the device number is `target_rank = local_block_index % cp_size`. The offset within the local block is `(local_block_index // cp_size) * cp_kv_cache_interleave_size + offset_within_virtual_block % cp_kv_cache_interleave_size`. diff --git a/docs/source/tutorials/DeepSeek-V3.1.md b/docs/source/tutorials/DeepSeek-V3.1.md index 250cafab..7781ce75 100644 --- a/docs/source/tutorials/DeepSeek-V3.1.md +++ b/docs/source/tutorials/DeepSeek-V3.1.md @@ -699,7 +699,7 @@ The performance result is: **Input/Output**: 3.5k/1.5k -**Performance**: TTFT = 6.16s, TPOT = 48.82ms, Average performance of each card is 478 TPS (Token Per Secon). +**Performance**: TTFT = 6.16s, TPOT = 48.82ms, Average performance of each card is 478 TPS (Token Per Second). ### Using vLLM Benchmark diff --git a/docs/source/user_guide/release_notes.md b/docs/source/user_guide/release_notes.md index 5a5cf6c2..def66dc9 100644 --- a/docs/source/user_guide/release_notes.md +++ b/docs/source/user_guide/release_notes.md @@ -2,7 +2,7 @@ ## v0.13.0rc2 - 2026.01.24 -This is the second release candidate of v0.13.0 for vLLM Ascend. In this rc relesae, we fixed lots of bugs and improved the performance of many models. Please follow the [official doc](https://docs.vllm.ai/projects/ascend/en/v0.13.0/) to get started. Any feedback is welcome to help us to improve the final version of v0.13.0. +This is the second release candidate of v0.13.0 for vLLM Ascend. In this rc release, we fixed lots of bugs and improved the performance of many models. Please follow the [official doc](https://docs.vllm.ai/projects/ascend/en/v0.13.0/) to get started. Any feedback is welcome to help us to improve the final version of v0.13.0. ### Highlights @@ -19,7 +19,7 @@ We mainly focus on quality and performance improvement in this release. The spec ### Model Support -- LongCat-Flash is supproted now.[#3833](https://github.com/vllm-project/vllm-ascend/pull/3833) +- LongCat-Flash is supported now.[#3833](https://github.com/vllm-project/vllm-ascend/pull/3833) - minimax_m2 is supported now. [#5624](https://github.com/vllm-project/vllm-ascend/pull/5624) - Support for cross-attention and whisper models [#5592](https://github.com/vllm-project/vllm-ascend/pull/5592) diff --git a/examples/external_online_dp/README.md b/examples/external_online_dp/README.md index 4844dc08..a02ab75c 100644 --- a/examples/external_online_dp/README.md +++ b/examples/external_online_dp/README.md @@ -5,7 +5,7 @@ Here is an example guiding how to use `launch_online_dp.py` to launch external d `run_dp_template.sh` is an template script used to launch each dp vllm instance separately. It will be called by `launch_online_dp.py` in multi threads and most of its configurations are set by `launch_online_dp.py`. Parameters you need to set manually include: 1. The IP and socket_ifname of your machine. If running on multi-nodes, please make sure the scripts on each node has been set with correct IP and socket_ifname of that node. -2. vLLM serving related parameters including model_path and other configurations. Note that port, dp-related parammeters and tp_size is set by `launch_online_dp.py`, all the other vLLM parameters in this file only serve as an example and you are free to modify them according to your purpose. +2. vLLM serving related parameters including model_path and other configurations. Note that port, dp-related parameters and tp_size is set by `launch_online_dp.py`, all the other vLLM parameters in this file only serve as an example and you are free to modify them according to your purpose. ### Run `launch_online_dp.py` with CL arguments diff --git a/typos.toml b/typos.toml index d15e1137..2055450b 100644 --- a/typos.toml +++ b/typos.toml @@ -19,7 +19,7 @@ locale = "en" extend-ignore-identifiers-re = [".*Unc.*", ".*_thw", ".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*fo.*", ".*ba.*", ".*ot.*", ".*[Tt]h[rR].*"] -extend-ignore-words-re = ["CANN", "cann","ND"] +extend-ignore-words-re = ["CANN", "cann","ND","alog"] extend-ignore-re = [] [default.extend-identifiers] diff --git a/vllm_ascend/_310p/worker_310p.py b/vllm_ascend/_310p/worker_310p.py index 9565a5ec..acb75a2b 100644 --- a/vllm_ascend/_310p/worker_310p.py +++ b/vllm_ascend/_310p/worker_310p.py @@ -33,5 +33,5 @@ class NPUWorker310(NPUWorker): self.model_runner = NPUModelRunner310(self.vllm_config, self.device) def _warm_up_atb(self): - # 310p device donot support torch_npu._npu_matmul_add_fp32 atb ops + # 310p device do not support torch_npu._npu_matmul_add_fp32 atb ops logger.info("Skip warm-up atb ops for 310P device")