From 9c1d58f4d241eb5c64e4d84144cfc36a2f6c6355 Mon Sep 17 00:00:00 2001 From: vllm-ascend-ci Date: Fri, 17 Apr 2026 16:29:30 +0800 Subject: [PATCH] [v0.18.0][Doc] Translated Doc files 2026-04-15 (#8309) ## Auto-Translation Summary Translated **19** file(s): - docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po - docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ModelRunner_prepare_inputs.po - docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/cpu_binding.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_multi_node.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_single_node.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_multi_node.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_single_node.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2.5.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-Omni.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Dense.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.po - docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-397B-A17B.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Fine_grained_TP.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/epd_disaggregation.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/external_dp.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po - docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po --- [Workflow run](https://github.com/vllm-project/vllm-ascend/actions/runs/24447109402) Signed-off-by: vllm-ascend-ci Co-authored-by: vllm-ascend-ci --- .../LC_MESSAGES/community/contributors.po | 38 +- .../community/versioning_policy.po | 112 +- .../Design_Documents/KV_Cache_Pool_Guide.po | 95 +- .../ModelRunner_prepare_inputs.po | 78 +- .../Design_Documents/cpu_binding.po | 69 +- ...ng_sequence_context_parallel_multi_node.po | 115 +- ...g_sequence_context_parallel_single_node.po | 179 +- .../pd_disaggregation_mooncake_multi_node.po | 76 +- .../pd_disaggregation_mooncake_single_node.po | 8 +- .../LC_MESSAGES/tutorials/models/Kimi-K2.5.po | 88 +- .../tutorials/models/Qwen2.5-Omni.po | 43 +- .../tutorials/models/Qwen3-Dense.po | 202 +- .../models/Qwen3-Omni-30B-A3B-Thinking.po | 64 +- .../tutorials/models/Qwen3.5-397B-A17B.po | 243 +- .../feature_guide/Fine_grained_TP.po | 43 +- .../feature_guide/epd_disaggregation.po | 39 +- .../user_guide/feature_guide/external_dp.po | 31 +- .../feature_guide/large_scale_ep.po | 38 +- .../LC_MESSAGES/user_guide/release_notes.po | 2606 +++++++++++------ 19 files changed, 2586 insertions(+), 1581 deletions(-) diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po b/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po index 212e4553..f6d6937d 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -32,7 +32,7 @@ msgid "Name" msgstr "姓名" #: ../../source/community/contributors.md -msgid "Github ID" +msgid "GitHub ID" msgstr "GitHub ID" #: ../../source/community/contributors.md @@ -917,6 +917,14 @@ msgstr "306" msgid "[@mengchengTang](https://github.com/mengchengTang)" msgstr "[@mengchengTang](https://github.com/mengchengTang)" +#: ../../source/community/contributors.md +msgid "" +"[41eb71d](https://github.com/vllm-project/vllm-" +"ascend/commit/41eb71d665ab9f0b72b6d3bc15d41dee7fcc0f5f)" +msgstr "" +"[41eb71d](https://github.com/vllm-project/vllm-" +"ascend/commit/41eb71d665ab9f0b72b6d3bc15d41dee7fcc0f5f)" + #: ../../source/community/contributors.md msgid "305" msgstr "305" @@ -2611,7 +2619,7 @@ msgstr "[@wangxiaochao6](https://github.com/wangxiaochao6)" #: ../../source/community/contributors.md msgid "2025/11/18" -msgstr "2025/11/18" +msgstr "2025年11月18日" #: ../../source/community/contributors.md msgid "" @@ -2631,7 +2639,7 @@ msgstr "[@845473182](https://github.com/845473182)" #: ../../source/community/contributors.md msgid "2025/11/14" -msgstr "2025/11/14" +msgstr "2025年11月14日" #: ../../source/community/contributors.md msgid "" @@ -2651,7 +2659,7 @@ msgstr "[@thonean](https://github.com/thonean)" #: ../../source/community/contributors.md msgid "2025/11/12" -msgstr "2025/11/12" +msgstr "2025年11月12日" #: ../../source/community/contributors.md msgid "" @@ -2671,7 +2679,7 @@ msgstr "[@zhaomingyu13](https://github.com/zhaomingyu13)" #: ../../source/community/contributors.md msgid "2025/11/11" -msgstr "2025/11/11" +msgstr "2025年11月11日" #: ../../source/community/contributors.md msgid "" @@ -3043,7 +3051,7 @@ msgstr "[@yzy1996](https://github.com/yzy1996)" #: ../../source/community/contributors.md msgid "2025/10/23" -msgstr "2025/10/23" +msgstr "2025年10月23日" #: ../../source/community/contributors.md msgid "" @@ -3111,7 +3119,7 @@ msgstr "[@KyrieDrewWang](https://github.com/KyrieDrewWang)" #: ../../source/community/contributors.md msgid "2025/10/22" -msgstr "2025/10/22" +msgstr "2025年10月22日" #: ../../source/community/contributors.md msgid "" @@ -3147,7 +3155,7 @@ msgstr "[@drslark](https://github.com/drslark)" #: ../../source/community/contributors.md msgid "2025/10/21" -msgstr "2025/10/21" +msgstr "2025年10月21日" #: ../../source/community/contributors.md msgid "" @@ -3183,7 +3191,7 @@ msgstr "[@leijie-ww](https://github.com/leijie-ww)" #: ../../source/community/contributors.md msgid "2025/10/20" -msgstr "2025/10/20" +msgstr "2025年10月20日" #: ../../source/community/contributors.md msgid "" @@ -3203,7 +3211,7 @@ msgstr "[@ZYang6263](https://github.com/ZYang6263)" #: ../../source/community/contributors.md msgid "2025/10/19" -msgstr "2025/10/19" +msgstr "2025年10月19日" #: ../../source/community/contributors.md msgid "" @@ -3223,7 +3231,7 @@ msgstr "[@yechao237](https://github.com/yechao237)" #: ../../source/community/contributors.md msgid "2025/10/18" -msgstr "2025/10/18" +msgstr "2025年10月18日" #: ../../source/community/contributors.md msgid "" @@ -3259,7 +3267,7 @@ msgstr "[@DreamerLeader](https://github.com/DreamerLeader)" #: ../../source/community/contributors.md msgid "2025/10/15" -msgstr "2025/10/15" +msgstr "2025年10月15日" #: ../../source/community/contributors.md msgid "" @@ -3279,7 +3287,7 @@ msgstr "[@yuzhup](https://github.com/yuzhup)" #: ../../source/community/contributors.md msgid "2025/10/14" -msgstr "2025/10/14" +msgstr "2025年10月14日" #: ../../source/community/contributors.md msgid "" @@ -3331,7 +3339,7 @@ msgstr "[@dsxsteven](https://github.com/dsxsteven)" #: ../../source/community/contributors.md msgid "2025/10/13" -msgstr "2025/10/13" +msgstr "2025年10月13日" #: ../../source/community/contributors.md msgid "" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po b/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po index 6f939d5f..9d9047f5 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -72,8 +72,9 @@ msgid "" "(`v[major].[minor].[micro]`). Any post version must be published as a " "patch version of the final release." msgstr "" -"**后续版本**:通常**按需发布**,用于解决正式版本中的小错误。与 [PEP-440 后续版本说明](https://peps.python.org/pep-" -"0440/#post-releases) 的惯例不同,这些版本包含实际的错误修复,因为正式发布版本必须严格与 vLLM 的正式发布格式 " +"**后续版本**:通常**按需发布**,用于解决正式版本中的小错误。与 [PEP-440 " +"后续版本说明](https://peps.python.org/pep-0440/#post-releases) " +"的惯例不同,这些版本包含实际的错误修复,因为正式发布版本必须严格与 vLLM 的正式发布格式 " "(`v[major].[minor].[micro]`) 对齐。任何后续版本都必须作为正式版本的补丁版本发布。" #: ../../source/community/versioning_policy.md:14 @@ -379,14 +380,17 @@ msgstr "v0.7.3" msgid "" "If you're using v0.7.3, don't forget to install [mindie-" "turbo](https://pypi.org/project/mindie-turbo) as well." -msgstr "如果您正在使用 v0.7.3,请别忘了同时安装 [mindie-turbo](https://pypi.org/project/mindie-turbo)。" +msgstr "" +"如果您正在使用 v0.7.3,请别忘了同时安装 [mindie-turbo](https://pypi.org/project/mindie-turbo)。" #: ../../source/community/versioning_policy.md:58 msgid "" "For main branch of vLLM Ascend, we usually make it compatible with the " "latest vLLM release and a newer commit hash of vLLM. Please note that " "this table is usually updated. Please check it regularly." -msgstr "对于 vLLM Ascend 的 main 分支,我们通常会使其与最新的 vLLM 发布版本以及更新的 vLLM 提交哈希兼容。请注意,此表格会经常更新,请定期查看。" +msgstr "" +"对于 vLLM Ascend 的 main 分支,我们通常会使其与最新的 vLLM 发布版本以及更新的 vLLM " +"提交哈希兼容。请注意,此表格会经常更新,请定期查看。" #: ../../source/community/versioning_policy.md:54 msgid "main" @@ -683,7 +687,9 @@ msgid "" "**releases/vX.Y.Z**: development branch, created with part of new " "releases of vLLM. For example, `releases/v0.13.0` is the dev branch for " "vLLM `v0.13.0` version." -msgstr "**releases/vX.Y.Z**:开发分支,随 vLLM 新版本的一部分创建。例如,`releases/v0.13.0` 是 vLLM `v0.13.0` 版本的开发分支。" +msgstr "" +"**releases/vX.Y.Z**:开发分支,随 vLLM 新版本的一部分创建。例如,`releases/v0.13.0` 是 vLLM " +"`v0.13.0` 版本的开发分支。" #: ../../source/community/versioning_policy.md:109 msgid "" @@ -760,7 +766,10 @@ msgid "" "do not (e.g. `releases/v0.12.0`). The vLLM Ascend release branch now " "follows the `releases/vX.Y.Z` naming convention, replacing the previous " "`vX.Y.Z-dev` format to align with vLLM's branch naming standards." -msgstr "请注意,vLLM Ascend 仅针对特定的 vLLM 发布版本进行发布,而非每个版本。因此,您可能会注意到某些版本有对应的开发分支(例如 `releases/v0.13.0`),而其他版本则没有(例如 `releases/v0.12.0`)。vLLM Ascend 的发布分支现在遵循 `releases/vX.Y.Z` 命名约定,取代了之前的 `vX.Y.Z-dev` 格式,以与 vLLM 的分支命名标准保持一致。" +msgstr "" +"请注意,vLLM Ascend 仅针对特定的 vLLM 发布版本进行发布,而非每个版本。因此,您可能会注意到某些版本有对应的开发分支(例如 " +"`releases/v0.13.0`),而其他版本则没有(例如 `releases/v0.12.0`)。vLLM Ascend 的发布分支现在遵循" +" `releases/vX.Y.Z` 命名约定,取代了之前的 `vX.Y.Z-dev` 格式,以与 vLLM 的分支命名标准保持一致。" #: ../../source/community/versioning_policy.md:125 msgid "" @@ -910,7 +919,10 @@ msgid "" " indicate that they have installed a dev or editable version of vLLM " "package. In this case, we provide the env variable `VLLM_VERSION` to let " "users specify the version of vLLM package to use." -msgstr "为确保代码更改与最新的 1 或 2 个 vLLM 发布版本兼容,vLLM Ascend 在代码中引入了版本检查机制。它首先检查已安装的 vLLM 包的版本,以决定使用哪段代码逻辑。如果用户遇到 `InvalidVersion` 错误,可能表明他们安装了开发版或可编辑版本的 vLLM 包。在这种情况下,我们提供了环境变量 `VLLM_VERSION`,允许用户指定要使用的 vLLM 包版本。" +msgstr "" +"为确保代码更改与最新的 1 或 2 个 vLLM 发布版本兼容,vLLM Ascend 在代码中引入了版本检查机制。它首先检查已安装的 vLLM " +"包的版本,以决定使用哪段代码逻辑。如果用户遇到 `InvalidVersion` 错误,可能表明他们安装了开发版或可编辑版本的 vLLM " +"包。在这种情况下,我们提供了环境变量 `VLLM_VERSION`,允许用户指定要使用的 vLLM 包版本。" #: ../../source/community/versioning_policy.md:154 msgid "" @@ -929,7 +941,10 @@ msgid "" "variables in [docs/source/conf.py](https://github.com/vllm-project/vllm-" "ascend/blob/main/docs/source/conf.py)**. While this is not a simple task," " it is a principle we should strive to follow." -msgstr "为降低维护成本,**所有分支的文档内容应保持一致,版本差异可通过 [docs/source/conf.py](https://github.com/vllm-project/vllm-ascend/blob/main/docs/source/conf.py) 中的变量进行控制**。虽然这并非易事,但这是我们应努力遵循的原则。" +msgstr "" +"为降低维护成本,**所有分支的文档内容应保持一致,版本差异可通过 [docs/source/conf.py](https://github.com" +"/vllm-project/vllm-ascend/blob/main/docs/source/conf.py) " +"中的变量进行控制**。虽然这并非易事,但这是我们应努力遵循的原则。" #: ../../source/community/versioning_policy.md:54 msgid "Version" @@ -945,7 +960,7 @@ msgstr "代码分支" #: ../../source/community/versioning_policy.md:54 msgid "latest" -msgstr "最新" +msgstr "latest" #: ../../source/community/versioning_policy.md:54 msgid "Doc for the latest rc release of main branch" @@ -957,7 +972,7 @@ msgstr "`main` 分支" #: ../../source/community/versioning_policy.md:54 msgid "rc version" -msgstr "候选版本" +msgstr "rc version" #: ../../source/community/versioning_policy.md:54 msgid "Doc for RC released versions" @@ -969,7 +984,7 @@ msgstr "`vX.Y.ZrcN` --> `vX.Y.ZrcN` 标签" #: ../../source/community/versioning_policy.md:54 msgid "version" -msgstr "版本" +msgstr "version" #: ../../source/community/versioning_policy.md:54 msgid "Doc for historical released versions" @@ -1004,77 +1019,14 @@ msgstr "软件依赖管理" #: ../../source/community/versioning_policy.md:174 msgid "" "`torch-npu`: Ascend Extension for PyTorch (torch-npu) releases a stable " -"version to [PyPi](https://pypi.org/project/torch-npu) every 3 months, a " +"version to [PyPI](https://pypi.org/project/torch-npu) every 3 months, a " "development version (aka the POC version) every month, and a nightly " -"version every day. The PyPi stable version **CAN** be used in vLLM Ascend" +"version every day. The PyPI stable version **CAN** be used in vLLM Ascend" " final version, the monthly dev version **ONLY CAN** be used in vLLM " "Ascend RC version for rapid iteration, and the nightly version **CANNOT**" " be used in vLLM Ascend any version or branch." msgstr "" "`torch-npu`:Ascend Extension for PyTorch(torch-npu)每 3 个月在 " -"[PyPi](https://pypi.org/project/torch-npu) 发布一个稳定版本,每月发布一个开发版本(亦称 POC 版本),每日发布一个 " -"nightly 版本。PyPi 稳定版本**可以**用于 vLLM Ascend 正式版,月度开发版本**仅能**用于 vLLM Ascend RC " -"版本以进行快速迭代,nightly 版本**不能**用于 vLLM Ascend 的任何版本或分支。" - -#~ msgid "MindIE Turbo" -#~ msgstr "MindIE Turbo" - -#~ msgid "2.0rc1" -#~ msgstr "2.0候选版本1" - -#~ msgid "The branch status will be in one of the following states:" -#~ msgstr "分支状态将处于以下几种状态之一:" - -#~ msgid "" -#~ "Note that vLLM Ascend will only be" -#~ " released for a certain vLLM release" -#~ " version rather than all versions. " -#~ "Hence, You might see only part of" -#~ " versions have dev branches (such as" -#~ " only `0.7.1-dev` / `0.7.3-dev` but " -#~ "no `0.7.2-dev`), this is as expected." -#~ msgstr "" -#~ "请注意,vLLM Ascend 仅会针对特定的 vLLM " -#~ "发布版本进行发布,而非所有版本。因此,您可能只会看到部分版本拥有开发分支(例如仅有 `0.7.1-dev` / `0.7.3-dev`,而没有 " -#~ "`0.7.2-dev`),这是正常现象。" - -#~ msgid "Doc for the latest dev branch" -#~ msgstr "最新开发分支的文档" - -#~ msgid "vX.Y.Z-dev (Will be `main` after the first final release)" -#~ msgstr "vX.Y.Z-dev(在首次正式发布后将成为 `main`)" - -#~ msgid "Git tags, like vX.Y.Z[rcN]" -#~ msgstr "Git 标签,如 vX.Y.Z[rcN]" - -#~ msgid "stable(not yet released)" -#~ msgstr "稳定版(尚未发布)" - -#~ msgid "Will be `vX.Y.Z-dev` after the first official release" -#~ msgstr "首次正式发布后将会是 `vX.Y.Z-dev`" - -#~ msgid "As shown above:" -#~ msgstr "如上所示:" - -#~ msgid "" -#~ "`latest` documentation: Matches the current" -#~ " maintenance branch `vX.Y.Z-dev` (Will be" -#~ " `main` after the first final " -#~ "release). Continuously updated to ensure " -#~ "usability for the latest release." -#~ msgstr "`latest` 文档:匹配当前维护分支 `vX.Y.Z-dev`(在首次正式发布后将成为 `main`)。持续更新以确保适用于最新发布版本。" - -#~ msgid "" -#~ "`stable` documentation (**not yet released**):" -#~ " Official release documentation. Updates " -#~ "are allowed in real-time after " -#~ "release, typically based on vX.Y.Z-dev. " -#~ "Once stable documentation is available, " -#~ "non-stable versions should display a " -#~ "header warning: `You are viewing the " -#~ "latest developer preview docs. Click " -#~ "here to view docs for the latest" -#~ " stable release.`." -#~ msgstr "" -#~ "`stable` 文档(**尚未发布**):官方发布版文档。发布后允许实时更新,通常基于 " -#~ "vX.Y.Z-dev。一旦稳定版文档可用,非稳定版本应显示一个顶部警告:`您正在查看最新的开发预览文档。点击此处查看最新稳定版本文档。`" +"[PyPI](https://pypi.org/project/torch-npu) 发布一个稳定版本,每月发布一个开发版本(亦称 POC " +"版本),每日发布一个 nightly 版本。PyPI 稳定版本**可以**用于 vLLM Ascend 正式版,月度开发版本**仅能**用于 " +"vLLM Ascend RC 版本以进行快速迭代,nightly 版本**不能**用于 vLLM Ascend 的任何版本或分支。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po index 48fc20c5..058b6a75 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -46,32 +46,42 @@ msgid "" "including HBM, DRAM, and SSD, making a pool for KV Cache storage while " "making the prefix of requests visible across all nodes, increasing the " "cache hit rate for all requests." -msgstr "因此,我们提出了 KV 缓存池,旨在利用包括 HBM、DRAM 和 SSD 在内的多种存储类型,构建一个 KV 缓存存储池,同时使请求的前缀在所有节点间可见,从而提高所有请求的缓存命中率。" +msgstr "" +"因此,我们提出了 KV 缓存池,旨在利用包括 HBM、DRAM 和 SSD 在内的多种存储类型,构建一个 KV " +"缓存存储池,同时使请求的前缀在所有节点间可见,从而提高所有请求的缓存命中率。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:11 msgid "" "vLLM Ascend currently supports [MooncakeStore](https://github.com" "/kvcache-ai/Mooncake), one of the most recognized KV Cache storage " "engines." -msgstr "vLLM Ascend 目前支持 [MooncakeStore](https://github.com/kvcache-ai/Mooncake),这是最受认可的 KV 缓存存储引擎之一。" +msgstr "" +"vLLM Ascend 目前支持 [MooncakeStore](https://github.com/kvcache-" +"ai/Mooncake),这是最受认可的 KV 缓存存储引擎之一。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:13 msgid "" -"While one can utilize Mooncake Store in vLLM V1 engine by setting it as a" -" remote backend of LMCache with GPU (see " +"While one can utilize MooncakeStore in vLLM V1 engine by setting it as a " +"remote backend of LMCache with GPU (see " "[Tutorial](https://github.com/LMCache/LMCache/blob/dev/examples/kv_cache_reuse/remote_backends/mooncakestore/README.md))," " we find it would be better to integrate a connector that directly " -"supports Mooncake Store and can utilize the data transfer strategy that " +"supports MooncakeStore and can utilize the data transfer strategy that " "best fits Huawei NPU hardware." -msgstr "虽然可以通过将 Mooncake Store 设置为 GPU 上 LMCache 的远程后端来在 vLLM V1 引擎中使用它(参见[教程](https://github.com/LMCache/LMCache/blob/dev/examples/kv_cache_reuse/remote_backends/mooncakestore/README.md)),但我们认为集成一个直接支持 Mooncake Store 并能利用最适合华为 NPU 硬件的数据传输策略的连接器会更好。" +msgstr "" +"虽然可以通过将 MooncakeStore 设置为 GPU 上 LMCache 的远程后端来在 vLLM V1 " +"引擎中使用它(参见[教程](https://github.com/LMCache/LMCache/blob/dev/examples/kv_cache_reuse/remote_backends/mooncakestore/README.md)),但我们认为集成一个直接支持" +" MooncakeStore 并能利用最适合华为 NPU 硬件的数据传输策略的连接器会更好。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:15 msgid "" -"Hence, we propose to integrate Mooncake Store with a brand new " +"Hence, we propose to integrate MooncakeStore with a brand new " "**MooncakeStoreConnectorV1**, which is indeed largely inspired by " "**LMCacheConnectorV1** (see the `How is MooncakeStoreConnectorV1 " "Implemented?` section)." -msgstr "因此,我们提议将 Mooncake Store 与全新的 **MooncakeStoreConnectorV1** 集成,该连接器的设计在很大程度上受到了 **LMCacheConnectorV1** 的启发(参见 `MooncakeStoreConnectorV1 是如何实现的?` 部分)。" +msgstr "" +"因此,我们提议将 MooncakeStore 与全新的 **MooncakeStoreConnectorV1** " +"集成,该连接器的设计在很大程度上受到了 **LMCacheConnectorV1** 的启发(参见 " +"`MooncakeStoreConnectorV1 是如何实现的?` 部分)。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:17 msgid "Usage" @@ -79,17 +89,21 @@ msgstr "使用方法" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:19 msgid "" -"vLLM Ascend currently supports Mooncake Store for KV Cache Pool. To " -"enable Mooncake Store, one needs to configure `kv-transfer-config` and " -"choose `MooncakeStoreConnector` as the KV Connector." -msgstr "vLLM Ascend 目前支持使用 Mooncake Store 作为 KV 缓存池。要启用 Mooncake Store,需要配置 `kv-transfer-config` 并选择 `MooncakeStoreConnector` 作为 KV 连接器。" +"vLLM Ascend currently supports MooncakeStore for KV Cache Pool. To enable" +" MooncakeStore, one needs to configure `kv-transfer-config` and choose " +"`MooncakeStoreConnector` as the KV Connector." +msgstr "" +"vLLM Ascend 目前支持使用 MooncakeStore 作为 KV 缓存池。要启用 MooncakeStore,需要配置 `kv-" +"transfer-config` 并选择 `MooncakeStoreConnector` 作为 KV 连接器。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:21 msgid "" "For step-by-step deployment and configuration, please refer to the [KV " "Pool User " "Guide](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/kv_pool.html)." -msgstr "关于逐步部署和配置,请参考 [KV 池用户指南](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/kv_pool.html)。" +msgstr "" +"关于逐步部署和配置,请参考 [KV " +"池用户指南](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/kv_pool.html)。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:23 msgid "How it works?" @@ -114,7 +128,9 @@ msgid "" "efficient caching both locally (in HBM) and globally (via Mooncake), " "ensuring that frequently used prefixes remain hot while less frequently " "accessed KV data can spill over to lower-cost memory." -msgstr "当与 vLLM 的前缀缓存机制结合时,该池能够实现本地(HBM 中)和全局(通过 Mooncake)的高效缓存,确保常用前缀保持热状态,而访问频率较低的 KV 数据则可以溢出到成本更低的内存中。" +msgstr "" +"当与 vLLM 的前缀缓存机制结合时,该池能够实现本地(HBM 中)和全局(通过 " +"Mooncake)的高效缓存,确保常用前缀保持热状态,而访问频率较低的 KV 数据则可以溢出到成本更低的内存中。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:31 msgid "1. Combining KV Cache Pool with HBM Prefix Caching" @@ -125,7 +141,9 @@ msgid "" "Prefix Caching with HBM is already supported by the vLLM V1 Engine. By " "introducing KV Connector V1, users can seamlessly combine HBM-based " "Prefix Caching with Mooncake-backed KV Pool." -msgstr "vLLM V1 引擎已支持基于 HBM 的前缀缓存。通过引入 KV Connector V1,用户可以无缝地将基于 HBM 的前缀缓存与 Mooncake 支持的 KV 池结合起来。" +msgstr "" +"vLLM V1 引擎已支持基于 HBM 的前缀缓存。通过引入 KV Connector V1,用户可以无缝地将基于 HBM 的前缀缓存与 " +"Mooncake 支持的 KV 池结合起来。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:36 msgid "" @@ -133,7 +151,9 @@ msgid "" "which is enabled by default in vLLM V1 unless the " "`--no_enable_prefix_caching` flag is set, and setting up the KV Connector" " for KV Pool (e.g., the MooncakeStoreConnector)." -msgstr "用户只需启用前缀缓存(在 vLLM V1 中默认启用,除非设置了 `--no_enable_prefix_caching` 标志)并为 KV 池设置 KV 连接器(例如 MooncakeStoreConnector),即可同时启用这两个功能。" +msgstr "" +"用户只需启用前缀缓存(在 vLLM V1 中默认启用,除非设置了 `--no_enable_prefix_caching` 标志)并为 KV " +"池设置 KV 连接器(例如 MooncakeStoreConnector),即可同时启用这两个功能。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:38 msgid "**Workflow**:" @@ -149,7 +169,9 @@ msgid "" " the connector. If there are additional hits in the KV Pool, we get the " "**additional blocks only** from the KV Pool, and get the rest of the " "blocks directly from HBM to minimize the data transfer latency." -msgstr "获取 HBM 上的命中令牌数量后,引擎通过连接器查询 KV 池。如果在 KV 池中有额外的命中,我们**仅从 KV 池获取额外的块**,其余块则直接从 HBM 获取,以最小化数据传输延迟。" +msgstr "" +"获取 HBM 上的命中令牌数量后,引擎通过连接器查询 KV 池。如果在 KV 池中有额外的命中,我们**仅从 KV " +"池获取额外的块**,其余块则直接从 HBM 获取,以最小化数据传输延迟。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:44 msgid "" @@ -173,7 +195,9 @@ msgid "" "Currently, we only perform put and get operations of KV Pool for " "**Prefill Nodes**, and Decode Nodes get their KV Cache from Mooncake P2P " "KV Connector, i.e., MooncakeConnector." -msgstr "目前,我们仅对**预填充节点**执行 KV 池的 put 和 get 操作,解码节点则通过 Mooncake P2P KV 连接器(即 MooncakeConnector)获取其 KV 缓存。" +msgstr "" +"目前,我们仅对**预填充节点**执行 KV 池的 put 和 get 操作,解码节点则通过 Mooncake P2P KV 连接器(即 " +"MooncakeConnector)获取其 KV 缓存。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:52 msgid "" @@ -182,15 +206,20 @@ msgid "" "Nodes, while not sacrificing the data transfer efficiency between Prefill" " and Decode nodes with P2P KV Connector that transfers KV Caches between " "NPU devices directly." -msgstr "这样做的主要好处是,我们可以通过为预填充节点使用来自 HBM 和 KV 池的前缀缓存来减少计算量,从而保持性能增益,同时又不牺牲预填充节点与解码节点之间的数据传输效率,因为 P2P KV 连接器直接在 NPU 设备间传输 KV 缓存。" +msgstr "" +"这样做的主要好处是,我们可以通过为预填充节点使用来自 HBM 和 KV " +"池的前缀缓存来减少计算量,从而保持性能增益,同时又不牺牲预填充节点与解码节点之间的数据传输效率,因为 P2P KV 连接器直接在 NPU " +"设备间传输 KV 缓存。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:54 msgid "" "To enable this feature, we need to set up both Mooncake Connector and " -"Mooncake Store Connector with a Multi Connector, which is a KV Connector " +"MooncakeStore Connector with a Multi Connector, which is a KV Connector " "class provided by vLLM that can call multiple KV Connectors in a specific" " order." -msgstr "要启用此功能,我们需要使用 Multi Connector 来设置 Mooncake Connector 和 Mooncake Store Connector。Multi Connector 是 vLLM 提供的一个 KV 连接器类,可以按特定顺序调用多个 KV 连接器。" +msgstr "" +"要启用此功能,我们需要使用 Multi Connector 来设置 Mooncake Connector 和 MooncakeStore " +"Connector。Multi Connector 是 vLLM 提供的一个 KV 连接器类,可以按特定顺序调用多个 KV 连接器。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:56 msgid "" @@ -208,7 +237,9 @@ msgid "" "V1: through implementing the required methods defined in the KV connector" " V1 base class, one can integrate a third-party KV cache transfer/storage" " backend into the vLLM framework." -msgstr "**MooncakeStoreConnectorV1** 继承自 vLLM V1 中的 KV Connector V1 类:通过实现 KV 连接器 V1 基类中定义的必要方法,可以将第三方 KV 缓存传输/存储后端集成到 vLLM 框架中。" +msgstr "" +"**MooncakeStoreConnectorV1** 继承自 vLLM V1 中的 KV Connector V1 类:通过实现 KV 连接器" +" V1 基类中定义的必要方法,可以将第三方 KV 缓存传输/存储后端集成到 vLLM 框架中。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:62 msgid "" @@ -220,7 +251,12 @@ msgid "" "that allows async `get` and `put` of KV caches with multi-threading, and " "NPU-related data transfer optimization such as removing the `LocalBuffer`" " in LMCache to remove redundant data transfer." -msgstr "MooncakeStoreConnectorV1 也在很大程度上借鉴了 LMCacheConnectorV1,包括用于查找 KV 缓存键的 `Lookup Engine`/`Lookup Client` 设计,以及用于将令牌处理为前缀感知哈希的 `ChunkedTokenDatabase` 类和其他哈希相关设计。在此基础上,我们还添加了自己的设计,包括允许通过多线程异步 `get` 和 `put` KV 缓存的 `KVTransferThread`,以及与 NPU 相关的数据传输优化,例如移除 LMCache 中的 `LocalBuffer` 以消除冗余数据传输。" +msgstr "" +"MooncakeStoreConnectorV1 也在很大程度上借鉴了 LMCacheConnectorV1,包括用于查找 KV 缓存键的 " +"`Lookup Engine`/`Lookup Client` 设计,以及用于将令牌处理为前缀感知哈希的 " +"`ChunkedTokenDatabase` 类和其他哈希相关设计。在此基础上,我们还添加了自己的设计,包括允许通过多线程异步 `get` 和 " +"`put` KV 缓存的 `KVTransferThread`,以及与 NPU 相关的数据传输优化,例如移除 LMCache 中的 " +"`LocalBuffer` 以消除冗余数据传输。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:64 msgid "" @@ -268,7 +304,8 @@ msgstr "" "`wait_for_layer_load`:可选;在分层 + 异步 KV 加载场景中等待层加载。\n" "`save_kv_layer`:可选;执行分层 KV 缓存放入 KV 池的操作。\n" "`wait_for_save`:如果异步保存/放入 KV 缓存,则等待 KV 保存完成。\n" -"`get_finished`:获取已完成 KV 传输的请求,如果 `put` 完成则为 `done_sending`,如果 `get` 完成则为 `done_receiving`。" +"`get_finished`:获取已完成 KV 传输的请求,如果 `put` 完成则为 `done_sending`,如果 `get` 完成则为 " +"`done_receiving`。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:82 msgid "DFX" @@ -293,9 +330,9 @@ msgstr "限制" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:89 msgid "" -"Currently, Mooncake Store for vLLM-Ascend only supports DRAM as the " +"Currently, MooncakeStore for vLLM-Ascend only supports DRAM as the " "storage for KV Cache pool." -msgstr "目前,vLLM-Ascend 的 Mooncake Store 仅支持 DRAM 作为 KV 缓存池的存储。" +msgstr "目前,vLLM-Ascend 的 MooncakeStore 仅支持 DRAM 作为 KV 缓存池的存储。" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:91 msgid "" @@ -306,4 +343,6 @@ msgid "" "situation by falling back the request and re-compute everything assuming " "there's no prefix cache hit (or even better, revert only one block and " "keep using the Prefix Caches before that)." -msgstr "目前,如果我们成功查找到一个键并发现它存在,但在调用 KV 池的 get 函数时失败,我们仅输出一条日志表明 get 操作失败并继续执行;因此,该特定请求的准确性可能会受到影响。我们将通过回退请求并假设没有前缀缓存命中来重新计算所有内容(或者更好的是,仅回退一个块并继续使用该块之前的前缀缓存)来处理这种情况。" \ No newline at end of file +msgstr "" +"目前,如果我们成功查找到一个键并发现其存在,但在调用 KV 池的 get 函数时获取失败,我们仅输出一条日志表明 get " +"操作失败并继续执行;因此,该特定请求的准确性可能会受到影响。我们将通过回退请求并假设没有前缀缓存命中来重新计算所有内容(或者更优的方案是,仅回退一个块并继续使用该块之前的前缀缓存)来处理这种情况。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ModelRunner_prepare_inputs.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ModelRunner_prepare_inputs.po index ddfff542..2cd26be9 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ModelRunner_prepare_inputs.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ModelRunner_prepare_inputs.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -88,13 +88,15 @@ msgid "" "At last, these `Token IDs` are required to be fed into a model, and " "`positions` should also be sent into the model to create `Rope` (Rotary " "positional embedding). Both of them are the inputs of the model." -msgstr "最后,这些 `Token IDs` 需要输入到模型中,`positions` 也需要送入模型以创建 `Rope`(旋转位置编码)。两者共同构成模型的输入。" +msgstr "" +"最后,这些 `Token IDs` 需要输入到模型中,`positions` 也需要送入模型以创建 " +"`Rope`(旋转位置编码)。两者共同构成模型的输入。" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:38 msgid "" "**Note**: The `Token IDs` are the inputs of a model, so we also call them" -" `Inputs IDs`." -msgstr "**注意**:`Token IDs` 是模型的输入,因此我们也称它们为 `Inputs IDs`。" +" `Input IDs`." +msgstr "**注意**:`Token IDs` 是模型的输入,因此我们也称它们为 `Input IDs`。" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:40 msgid "2. Build inputs attention metadata" @@ -185,14 +187,19 @@ msgid "" "len)`. Here, `max num request` is the maximum count of concurrent " "requests allowed in a forward batch and `max model len` is the maximum " "token count that can be handled at one request sequence in this model." -msgstr "**Token IDs table**:存储每个请求的 token IDs(即模型的输入)。此表的形状为 `(max num request, max model len)`。其中,`max num request` 是前向批次中允许的最大并发请求数,`max model len` 是该模型中单个请求序列可以处理的最大 token 数量。" +msgstr "" +"**Token IDs table**:存储每个请求的 token IDs(即模型的输入)。此表的形状为 `(max num request, " +"max model len)`。其中,`max num request` 是前向批次中允许的最大并发请求数,`max model len` " +"是该模型中单个请求序列可以处理的最大 token 数量。" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:62 msgid "" "**Block table**: translates the logical address (within its sequence) of " "each block to its global physical address in the device's memory. The " "shape of this table is `(max num request, max model len / block size)`" -msgstr "**Block table**:将每个块在其序列内的逻辑地址转换为其在设备内存中的全局物理地址。此表的形状为 `(max num request, max model len / block size)`" +msgstr "" +"**Block table**:将每个块在其序列内的逻辑地址转换为其在设备内存中的全局物理地址。此表的形状为 `(max num request," +" max model len / block size)`" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:64 msgid "" @@ -255,13 +262,14 @@ msgid "Obtain inputs" msgstr "获取输入" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:103 -#, python-brace-format msgid "" "As the maximum number of tokens that can be scheduled is 10, the " "scheduled tokens of each request can be represented as `{'0': 3, '1': 2, " "'2': 5}`. Note that `request_2` uses chunked prefill, leaving 3 prompt " "tokens unscheduled." -msgstr "由于一次可调度的最大 token 数为 10,每个请求的已调度 token 可以表示为 `{'0': 3, '1': 2, '2': 5}`。注意 `request_2` 使用了分块预填充,留下了 3 个提示 token 未调度。" +msgstr "" +"由于一次可调度的最大 token 数为 10,每个请求的已调度 token 可以表示为 `{'0': 3, '1': 2, '2': 5}`。注意" +" `request_2` 使用了分块预填充,留下了 3 个提示 token 未调度。" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:105 msgid "1. Get token positions" @@ -273,7 +281,10 @@ msgid "" "assigned to **request_0**, tokens 3–4 to **request_1**, and tokens 5–9 to" " **request_2**. To represent this mapping, we use `request indices`, for " "example, `request indices`: `[0, 0, 0, 1, 1, 2, 2, 2, 2, 2]`." -msgstr "首先,确定每个 token 属于哪个请求:token 0–2 分配给 **request_0**,token 3–4 分配给 **request_1**,token 5–9 分配给 **request_2**。为了表示这种映射,我们使用 `request indices`,例如,`request indices`:`[0, 0, 0, 1, 1, 2, 2, 2, 2, 2]`。" +msgstr "" +"首先,确定每个 token 属于哪个请求:token 0–2 分配给 **request_0**,token 3–4 分配给 " +"**request_1**,token 5–9 分配给 **request_2**。为了表示这种映射,我们使用 `request " +"indices`,例如,`request indices`:`[0, 0, 0, 1, 1, 2, 2, 2, 2, 2]`。" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:109 msgid "" @@ -281,7 +292,10 @@ msgid "" "position of current scheduled tokens** (`request_0: [0 + 0, 0 + 1, 0 + " "2]`, `request_1: [0 + 0, 0 + 1]`, `request_2: [0 + 0, 0 + 1,..., 0 + 4]`)" " and then concatenate them together (`[0, 1, 2, 0, 1, 0, 1, 2, 3, 4]`)." -msgstr "对于每个请求,使用 **已计算 token 的数量** + **当前调度 token 的相对位置**(`request_0: [0 + 0, 0 + 1, 0 + 2]`,`request_1: [0 + 0, 0 + 1]`,`request_2: [0 + 0, 0 + 1,..., 0 + 4]`),然后将它们连接在一起(`[0, 1, 2, 0, 1, 0, 1, 2, 3, 4]`)。" +msgstr "" +"对于每个请求,使用 **已计算 token 的数量** + **当前调度 token 的相对位置**(`request_0: [0 + 0, 0 " +"+ 1, 0 + 2]`,`request_1: [0 + 0, 0 + 1]`,`request_2: [0 + 0, 0 + 1,..., 0" +" + 4]`),然后将它们连接在一起(`[0, 1, 2, 0, 1, 0, 1, 2, 3, 4]`)。" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:111 msgid "" @@ -293,7 +307,9 @@ msgstr "注意:在实际代码中,有一种更高效的方法(使用 `requ msgid "" "Finally, `token positions` can be obtained as `[0, 1, 2, 0, 1, 0, 1, 2, " "3, 4]`. This variable is **token level**." -msgstr "最后,`token positions` 可以获取为 `[0, 1, 2, 0, 1, 0, 1, 2, 3, 4]`。此变量是 **token 级别** 的。" +msgstr "" +"最后,`token positions` 可以获取为 `[0, 1, 2, 0, 1, 0, 1, 2, 3, 4]`。此变量是 **token " +"级别** 的。" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:115 msgid "2. Get token indices" @@ -326,14 +342,19 @@ msgstr "注意 `T_x_x` 是一个 `int32`。" msgid "" "Let's say `M = max model len`. Then we can use `token positions` together" " with `request indices` of each token to construct `token indices`." -msgstr "假设 `M = max model len`。那么我们可以使用 `token positions` 以及每个 token 的 `request indices` 来构造 `token indices`。" +msgstr "" +"假设 `M = max model len`。那么我们可以使用 `token positions` 以及每个 token 的 `request " +"indices` 来构造 `token indices`。" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:137 msgid "" "So `token indices` = `[0 + 0 * M, 1 + 0 * M, 2 + 0 * M, 0 + 1 * M, 1 + 1 " "* M, 0 + 2 * M, 1 + 2 * M, 2 + 2 * M, 3 + 2 * M, 4 + 2 * M]` = `[0, 1, 2," " 12, 13, 24, 25, 26, 27, 28]`" -msgstr "所以 `token indices` = `[0 + 0 * M, 1 + 0 * M, 2 + 0 * M, 0 + 1 * M, 1 + 1 * M, 0 + 2 * M, 1 + 2 * M, 2 + 2 * M, 3 + 2 * M, 4 + 2 * M]` = `[0, 1, 2, 12, 13, 24, 25, 26, 27, 28]`" +msgstr "" +"所以 `token indices` = `[0 + 0 * M, 1 + 0 * M, 2 + 0 * M, 0 + 1 * M, 1 + 1 " +"* M, 0 + 2 * M, 1 + 2 * M, 2 + 2 * M, 3 + 2 * M, 4 + 2 * M]` = `[0, 1, 2," +" 12, 13, 24, 25, 26, 27, 28]`" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:139 msgid "3. Retrieve the Token IDs" @@ -353,7 +374,9 @@ msgstr "如前所述,我们将这些 `Token IDs` 称为 `Input IDs`。" msgid "" "`Input IDs` = `[T_0_0, T_0_1, T_0_2, T_1_0, T_1_1, T_2_0, T_2_1, T_3_2, " "T_3_3, T_3_4]`" -msgstr "`Input IDs` = `[T_0_0, T_0_1, T_0_2, T_1_0, T_1_1, T_2_0, T_2_1, T_3_2, T_3_3, T_3_4]`" +msgstr "" +"`Input IDs` = `[T_0_0, T_0_1, T_0_2, T_1_0, T_1_1, T_2_0, T_2_1, T_3_2, " +"T_3_3, T_3_4]`" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:151 #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:237 @@ -367,7 +390,8 @@ msgid "" "model len / block size)`, where `max model len / block size = 12 / 2 = " "6`." msgstr "" -"在当前的**块表**中,我们使用第一个块(即 block_0)来标记未使用的块。块的形状为 `(最大请求数, 最大模型长度 / 块大小)`,其中 `最大模型长度 / 块大小 = 12 / 2 = 6`。" +"在当前的**块表**中,我们使用第一个块(即 block_0)来标记未使用的块。块的形状为 `(最大请求数, 最大模型长度 / 块大小)`,其中 " +"`最大模型长度 / 块大小 = 12 / 2 = 6`。" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:165 msgid "The KV cache block in the device memory is like:" @@ -434,7 +458,11 @@ msgid "" " / 2] = [0, 0, 1, 6, 6, 12, 12, 13, 13, 14]`. This could be used to " "select `device block number` from `block table`." msgstr "" -"(**令牌级别**) 使用一个简单的公式计算`块表索引`:`request indices * K + positions / block size`。因此它等于 `[0 * 6 + 0 / 2, 0 * 6 + 1 / 2, 0 * 6 + 2 / 2, 1 * 6 + 0 / 2, 1 * 6 + 1 / 2, 2 * 6 + 0 / 2, 2 * 6 + 1 / 2, 2 * 6 + 2 / 2, 2 * 6 + 3 / 2, 2 * 6 + 4 / 2] = [0, 0, 1, 6, 6, 12, 12, 13, 13, 14]`。这可用于从`块表`中选择`设备块编号`。" +"(**令牌级别**) 使用一个简单的公式计算`块表索引`:`request indices * K + positions / block " +"size`。因此它等于 `[0 * 6 + 0 / 2, 0 * 6 + 1 / 2, 0 * 6 + 2 / 2, 1 * 6 + 0 / 2," +" 1 * 6 + 1 / 2, 2 * 6 + 0 / 2, 2 * 6 + 1 / 2, 2 * 6 + 2 / 2, 2 * 6 + 3 / " +"2, 2 * 6 + 4 / 2] = [0, 0, 1, 6, 6, 12, 12, 13, 13, " +"14]`。这可用于从`块表`中选择`设备块编号`。" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:194 msgid "" @@ -443,14 +471,17 @@ msgid "" "block_table[block_table_indices]`. So `device block number=[1, 1, 2, 3, " "3, 4, 4, 5, 5, 6]`" msgstr "" -"(**令牌级别**) 使用`块表索引`为每个已调度的令牌选择出`设备块编号`。伪代码为 `block_numbers = block_table[block_table_indices]`。因此 `设备块编号=[1, 1, 2, 3, 3, 4, 4, 5, 5, 6]`" +"(**令牌级别**) 使用`块表索引`为每个已调度的令牌选择出`设备块编号`。伪代码为 `block_numbers = " +"block_table[block_table_indices]`。因此 `设备块编号=[1, 1, 2, 3, 3, 4, 4, 5, 5, " +"6]`" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:195 msgid "" "(**Token level**) `block offsets` could be computed by `block offsets = " "positions % block size = [0, 1, 0, 0, 1, 0, 1, 0, 1, 0]`." msgstr "" -"(**令牌级别**) `块内偏移`可以通过 `block offsets = positions % block size = [0, 1, 0, 0, 1, 0, 1, 0, 1, 0]` 计算得出。" +"(**令牌级别**) `块内偏移`可以通过 `block offsets = positions % block size = [0, 1, 0," +" 0, 1, 0, 1, 0, 1, 0]` 计算得出。" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:196 msgid "" @@ -458,7 +489,8 @@ msgid "" "mapping`: `device block number * block size + block_offsets = [2, 3, 4, " "6, 7, 8, 9, 10, 11, 12]`" msgstr "" -"最后,使用`块内偏移`和`设备块编号`创建`槽映射`:`设备块编号 * 块大小 + 块内偏移 = [2, 3, 4, 6, 7, 8, 9, 10, 11, 12]`" +"最后,使用`块内偏移`和`设备块编号`创建`槽映射`:`设备块编号 * 块大小 + 块内偏移 = [2, 3, 4, 6, 7, 8, 9, " +"10, 11, 12]`" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:198 msgid "(**Request level**) As we know the scheduled token count is `[3, 2, 5]`:" @@ -538,7 +570,9 @@ msgid "" "**Note**: **T_0_3**, **T_1_2** are new Token IDs of **request_0** and " "**request_1** respectively. They are sampled from the output of the " "model." -msgstr "**注意**:**T_0_3**、**T_1_2** 分别是 **request_0** 和 **request_1** 的新令牌 ID。它们是从模型输出中采样得到的。" +msgstr "" +"**注意**:**T_0_3**、**T_1_2** 分别是 **request_0** 和 **request_1** 的新令牌 " +"ID。它们是从模型输出中采样得到的。" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:234 msgid "`token indices`: `[3, 14, 29, 30, 31]`" @@ -553,7 +587,9 @@ msgid "" "We allocate the blocks `7` and `8` to `request_1` and `request_2` " "respectively, as they need more space in device to store KV cache " "following token generation or chunked prefill." -msgstr "我们将块 `7` 和 `8` 分别分配给 `request_1` 和 `request_2`,因为它们在令牌生成或分块预填充后需要更多设备空间来存储 KV 缓存。" +msgstr "" +"我们将块 `7` 和 `8` 分别分配给 `request_1` 和 " +"`request_2`,因为它们在令牌生成或分块预填充后需要更多设备空间来存储 KV 缓存。" #: ../../source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md:241 msgid "Current **Block Table**:" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/cpu_binding.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/cpu_binding.po index 98a91297..70d06242 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/cpu_binding.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/cpu_binding.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -35,9 +35,8 @@ msgid "" "Ascend NPUs and is automatically executed during worker initialization " "when enabled." msgstr "" -"CPU 绑定将 vLLM Ascend 工作进程和关键线程固定到特定的 CPU 核心,以减少 CPU-" -"NPU 跨 NUMA 流量,并在多进程工作负载下稳定延迟。它专为运行 Ascend NPU 的 ARM " -"服务器设计,启用后会在工作进程初始化期间自动执行。" +"CPU 绑定将 vLLM Ascend 工作进程和关键线程固定到特定的 CPU 核心,以减少 CPU-NPU 跨 NUMA " +"流量,并在多进程工作负载下稳定延迟。它专为运行 Ascend NPU 的 ARM 服务器设计,启用后会在工作进程初始化期间自动执行。" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:7 msgid "Background" @@ -53,10 +52,9 @@ msgid "" "purely a host‑side affinity policy and does not change model execution " "logic." msgstr "" -"在多插槽 ARM 系统上,操作系统调度器可能会将 vLLM 线程放置在远离本地 NPU 的 " -"CPU 上,从而导致 NUMA 跨域流量和延迟抖动。CPU 绑定强制执行一种确定性的 CPU " -"放置策略,并可选地将 NPU IRQ 绑定到同一个 CPU 池。这与其他性能特性(如图模式" -"或动态批处理)不同,因为它纯粹是主机端的亲和性策略,不改变模型执行逻辑。" +"在多插槽 ARM 系统上,操作系统调度器可能会将 vLLM 线程放置在远离本地 NPU 的 CPU 上,从而导致 NUMA " +"跨域流量和延迟抖动。CPU 绑定强制执行一种确定性的 CPU 放置策略,并可选地将 NPU IRQ 绑定到同一个 CPU " +"池。这与其他性能特性(如图模式或动态批处理)不同,因为它纯粹是主机端的亲和性策略,不改变模型执行逻辑。" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:11 msgid "Design & How it works" @@ -71,8 +69,8 @@ msgid "" "**Allowed CPU list**: The cpuset from /proc/self/status " "(Cpus_allowed_list). All allocations are constrained to this list." msgstr "" -"**允许的 CPU 列表**:来自 /proc/self/status (Cpus_allowed_list) 的 cpuset。" -"所有分配都受限于此列表。" +"**允许的 CPU 列表**:来自 /proc/self/status (Cpus_allowed_list) 的 " +"cpuset。所有分配都受限于此列表。" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:16 msgid "" @@ -86,8 +84,7 @@ msgstr "" msgid "" "**CPU pool per NPU**: The CPU list assigned to each logical NPU ID based " "on the binding mode." -msgstr "" -"**每个 NPU 的 CPU 池**:根据绑定模式分配给每个逻辑 NPU ID 的 CPU 列表。" +msgstr "**每个 NPU 的 CPU 池**:根据绑定模式分配给每个逻辑 NPU ID 的 CPU 列表。" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:18 msgid "**Binding modes & Device behavior**:" @@ -119,8 +116,8 @@ msgid "" "logical NPUs**, ensuring each NPU is assigned a contiguous segment of CPU" " cores. This prevents CPU core overlap across multiple process groups." msgstr "" -"根据**全局逻辑 NPU 总数**均匀分割允许的 CPU 列表,确保每个 NPU 被分配一个连" -"续的 CPU 核心段。这可以防止多个进程组之间的 CPU 核心重叠。" +"根据**全局逻辑 NPU 总数**均匀分割允许的 CPU 列表,确保每个 NPU 被分配一个连续的 CPU 核心段。这可以防止多个进程组之间的 " +"CPU 核心重叠。" #: ../../source/developer_guide/Design_Documents/cpu_binding.md msgid "A2 / 310P / Others" @@ -136,8 +133,8 @@ msgid "" "If multiple NPUs are assigned to a single NUMA node (which may cause " "bandwidth contention), the CPU allocation extends to adjacent NUMA nodes." msgstr "" -"基于 NPU 拓扑亲和性 (`npu-smi info -t topo`) 分配 CPU。如果多个 NPU 被分配" -"到单个 NUMA 节点(可能导致带宽争用),则 CPU 分配会扩展到相邻的 NUMA 节点。" +"基于 NPU 拓扑亲和性 (`npu-smi info -t topo`) 分配 CPU。如果多个 NPU 被分配到单个 NUMA " +"节点(可能导致带宽争用),则 CPU 分配会扩展到相邻的 NUMA 节点。" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:25 msgid "**Default**: enabled (enable_cpu_binding = true)." @@ -151,8 +148,7 @@ msgstr "**回退**:如果 NPU 拓扑亲和性不可用,则使用 global_slic msgid "" "**Failure handling**: Any exception in binding is logged as a warning and" " **binding is skipped for that rank**." -msgstr "" -"**故障处理**:绑定过程中的任何异常都会记录为警告,并且**跳过该等级的绑定**。" +msgstr "**故障处理**:绑定过程中的任何异常都会记录为警告,并且**跳过该等级的绑定**。" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:29 msgid "Execution flow (simplified)" @@ -373,9 +369,7 @@ msgstr "`IRQ`: 600-601, `Main`: 602-637, `ACL`: 638, `Release`: 639" msgid "" "This layout remains deterministic even when multiple processes share the " "same cpuset, because slicing is based on the global logical NPU ID." -msgstr "" -"即使多个进程共享同一个 cpuset,此布局也保持确定性,因为切片是基于全局逻辑 " -"NPU ID 的。" +msgstr "即使多个进程共享同一个 cpuset,此布局也保持确定性,因为切片是基于全局逻辑 NPU ID 的。" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:86 msgid "Example 2: A3 global_slice, even split" @@ -389,6 +383,10 @@ msgstr "示例 2:A3 global_slice,均匀分割" msgid "**Inputs**:" msgstr "**输入**:" +#: ../../source/developer_guide/Design_Documents/cpu_binding.md:90 +msgid "allowed_cpus = [0..23] (24 CPUs)" +msgstr "allowed_cpus = [0..23] (24个CPU)" + #: ../../source/developer_guide/Design_Documents/cpu_binding.md:91 msgid "" "NUMA nodes = 0..1 (2 NUMA nodes, symmetric layout; NUMA0 = 0..11, NUMA1 =" @@ -520,7 +518,10 @@ msgid "" "(6,7) and NUMA1 (8..11). This is a direct consequence of global slicing " "over the ordered cpuset; the remainder distribution does not enforce NUMA" " boundaries." -msgstr "在上述对称NUMA布局中 (NUMA0 = 0..7, NUMA1 = 8..16),NPU0保持在NUMA0内,NPU2保持在NUMA1内,但NPU1跨越了NUMA0 (6,7) 和 NUMA1 (8..11)。这是对有序cpuset进行全局切片的直接结果;余数分配不强制NUMA边界。" +msgstr "" +"在上述对称NUMA布局中 (NUMA0 = 0..7, NUMA1 = " +"8..16),NPU0保持在NUMA0内,NPU2保持在NUMA1内,但NPU1跨越了NUMA0 (6,7) 和 NUMA1 " +"(8..11)。这是对有序cpuset进行全局切片的直接结果;余数分配不强制NUMA边界。" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:134 msgid "" @@ -539,7 +540,9 @@ msgid "" "avoid cross‑NUMA pools. A future enhancement should incorporate NUMA node" " boundaries into the slicing logic so that pools remain within a single " "NUMA node whenever possible." -msgstr "使用当前的 `global_slice` 策略,某些CPU/NPU布局无法避免跨NUMA池。未来的增强应将NUMA节点边界纳入切片逻辑,以便池尽可能保持在单个NUMA节点内。" +msgstr "" +"使用当前的 `global_slice` " +"策略,某些CPU/NPU布局无法避免跨NUMA池。未来的增强应将NUMA节点边界纳入切片逻辑,以便池尽可能保持在单个NUMA节点内。" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:140 msgid "Example 4: global_slice with visible subset of NPUs" @@ -594,7 +597,6 @@ msgid "Example 5: A2/310P topo_affinity with NUMA extension" msgstr "示例 5: 具有NUMA扩展的 A2/310P topo_affinity" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:163 -#, python-brace-format msgid "npu_affinity = {0: [0..7], 1: [0..7]} (from `npu-smi info -t topo`)" msgstr "npu_affinity = {0: [0..7], 1: [0..7]} (来自 `npu-smi info -t topo`)" @@ -745,11 +747,12 @@ msgid "" "0–31, NUMA1 = CPUs 32–63, and the cpuset is 0–63. With 4 logical NPUs, " "global slicing yields 16 CPUs per NPU (0–15, 16–31, 32–47, 48–63), so " "each NPU’s pool stays within a single NUMA node." -msgstr "示例(对称布局):2个NUMA节点,总共64个CPU。NUMA0 = CPU 0–31,NUMA1 = CPU 32–63,cpuset为0–63。对于4个逻辑NPU,全局切片每个NPU产生16个CPU (0–15, 16–31, 32–47, 48–63),因此每个NPU的池保持在单个NUMA节点内。" +msgstr "" +"示例(对称布局):2个NUMA节点,共64个CPU。NUMA0 = CPU 0–31,NUMA1 = CPU 32–63,cpuset为0–63。对于4个逻辑NPU,全局切片为每个NPU分配16个CPU (0–15, 16–31, 32–47, 48–63),因此每个NPU的CPU池都保持在单个NUMA节点内。" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:212 msgid "**Runtime dependencies**:" -msgstr "**运行时依赖**:" +msgstr "**运行时依赖项**:" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:213 msgid "Requires npu‑smi and lscpu commands." @@ -761,13 +764,13 @@ msgstr "IRQ绑定需要对 /proc/irq 的写访问权限。" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:215 msgid "Memory binding requires migratepages; otherwise it is skipped." -msgstr "内存绑定需要 migratepages;否则将被跳过。" +msgstr "内存绑定需要 migratepages;否则将跳过此步骤。" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:216 msgid "" "**IRQ side effects**: irqbalance may be stopped to avoid overriding " "bindings." -msgstr "**IRQ副作用**:可能会停止 irqbalance 以避免覆盖绑定。" +msgstr "**IRQ副作用**:可能会停止 irqbalance 服务以避免覆盖绑定。" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:217 msgid "" @@ -788,13 +791,15 @@ msgstr "使用标准的 vLLM 日志配置来启用调试日志。当启用调试 #: ../../source/developer_guide/Design_Documents/cpu_binding.md:223 msgid "References" -msgstr "参考" +msgstr "参考资料" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:225 msgid "" "CPU binding implementation: vllm_ascend/cpu_binding.py (`DeviceInfo`, " "`CpuAlloc`, `bind_cpus`)" -msgstr "CPU 绑定实现:vllm_ascend/cpu_binding.py (`DeviceInfo`, `CpuAlloc`, `bind_cpus`)" +msgstr "" +"CPU 绑定实现:vllm_ascend/cpu_binding.py (`DeviceInfo`, `CpuAlloc`, " +"`bind_cpus`)" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:226 msgid "" @@ -807,7 +812,9 @@ msgid "" "Additional config option: " "docs/source/user_guide/configuration/additional_config.md " "(`enable_cpu_binding`)" -msgstr "附加配置选项:docs/source/user_guide/configuration/additional_config.md (`enable_cpu_binding`)" +msgstr "" +"附加配置选项:docs/source/user_guide/configuration/additional_config.md " +"(`enable_cpu_binding`)" #: ../../source/developer_guide/Design_Documents/cpu_binding.md:228 msgid "Tests: tests/ut/device_allocator/test_cpu_binding.py" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_multi_node.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_multi_node.po index 071513e3..5a56840a 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_multi_node.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_multi_node.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -53,7 +53,12 @@ msgid "" "memory usage, it would introduce additional communication and small " "operator overhead. Therefore, we will not enable the DCP feature on node " "d." -msgstr "以 Deepseek-V3.1-w8a8 模型为例,使用 3 台 Atlas 800T A3 服务器部署“1P1D”架构。节点 p 跨多台机器部署,而节点 d 部署在单台机器上。假设预填充服务器的 IP 为 192.0.0.1(预填充 1)和 192.0.0.2(预填充 2),解码器服务器为 192.0.0.3(解码器 1)。每台服务器使用 8 个 NPU(16 个芯片)部署一个服务实例。在当前示例中,我们将在节点 p 上启用上下文并行特性以改善 TTFT。虽然在节点 d 上启用 DCP 特性可以减少内存使用,但会引入额外的通信和小算子开销。因此,我们不会在节点 d 上启用 DCP 特性。" +msgstr "" +"以 Deepseek-V3.1-w8a8 模型为例,使用 3 台 Atlas 800T A3 服务器部署“1P1D”架构。节点 p " +"跨多台机器部署,而节点 d 部署在单台机器上。假设预填充服务器的 IP 为 192.0.0.1(预填充 1)和 192.0.0.2(预填充 " +"2),解码器服务器为 192.0.0.3(解码器 1)。每台服务器使用 8 个 NPU(16 个芯片)部署一个服务实例。在当前示例中,我们将在节点" +" p 上启用上下文并行特性以改善 TTFT。虽然在节点 d 上启用 DCP " +"特性可以减少内存使用,但会引入额外的通信和小算子开销。因此,我们不会在节点 d 上启用 DCP 特性。" #: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:13 msgid "Environment Preparation" @@ -69,7 +74,11 @@ msgid "" "model weight](https://www.modelscope.cn/models/Eco-" "Tech/DeepSeek-V3.1-w8a8). Please modify `torch_dtype` from `float16` to " "`bfloat16` in `config.json`." -msgstr "`DeepSeek-V3.1_w8a8mix_mtp`(混合 MTP 量化版本):[下载模型权重](https://www.modelscope.cn/models/Eco-Tech/DeepSeek-V3.1-w8a8)。请在 `config.json` 中将 `torch_dtype` 从 `float16` 修改为 `bfloat16`。" +msgstr "" +"`DeepSeek-V3.1_w8a8mix_mtp`(混合 MTP " +"量化版本):[下载模型权重](https://www.modelscope.cn/models/Eco-" +"Tech/DeepSeek-V3.1-w8a8)。请在 `config.json` 中将 `torch_dtype` 从 `float16` " +"修改为 `bfloat16`。" #: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:19 msgid "" @@ -86,7 +95,9 @@ msgid "" "Refer to [verify multi-node communication " "environment](../../installation.md#verify-multi-node-communication) to " "verify multi-node communication." -msgstr "请参考[验证多节点通信环境](../../installation.md#verify-multi-node-communication)来验证多节点通信。" +msgstr "" +"请参考[验证多节点通信环境](../../installation.md#verify-multi-node-" +"communication)来验证多节点通信。" #: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:25 msgid "Installation" @@ -101,7 +112,9 @@ msgid "" "Select an image based on your machine type and start the Docker image on " "your node, refer to [using Docker](../../installation.md#set-up-using-" "docker)." -msgstr "根据您的机器类型选择镜像并在节点上启动 Docker 镜像,请参考[使用 Docker](../../installation.md#set-up-using-docker)。" +msgstr "" +"根据您的机器类型选择镜像并在节点上启动 Docker 镜像,请参考[使用 Docker](../../installation.md#set-" +"up-using-docker)。" #: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:64 msgid "You need to set up environment on each node." @@ -119,7 +132,10 @@ msgid "" "socket listeners. To avoid any issues, port conflicts should be " "prevented. Additionally, ensure that each node's engine_id is uniquely " "assigned to avoid conflicts." -msgstr "我们可以分别在预填充器/解码器节点上运行以下脚本来启动服务器。请注意,每个 P/D 节点将占用从 kv_port 到 kv_port + num_chips 的端口范围来初始化 socket 监听器。为避免任何问题,应防止端口冲突。此外,请确保每个节点的 engine_id 被唯一分配以避免冲突。" +msgstr "" +"我们可以分别在预填充器/解码器节点上运行以下脚本来启动服务器。请注意,每个 P/D 节点将占用从 kv_port 到 kv_port + " +"num_chips 的端口范围来初始化 socket 监听器。为避免任何问题,应防止端口冲突。此外,请确保每个节点的 engine_id " +"被唯一分配以避免冲突。" #: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:70 msgid "" @@ -154,7 +170,10 @@ msgid "" "[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-" "project/vllm-" "ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" -msgstr "在与预填充服务实例相同的节点上运行代理服务器。您可以在仓库的示例中找到代理程序:[load_balance_proxy_server_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" +msgstr "" +"在与预填充服务实例相同的节点上运行代理服务器。您可以在仓库的示例中找到代理程序:[load_balance_proxy_server_example.py](https://github.com" +"/vllm-project/vllm-" +"ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" #: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:301 msgid "**Notice:** The parameters are explained as follows:" @@ -193,21 +212,29 @@ msgid "" "state is also counted in metrics such as TTFT and TPOT. Therefore, when " "testing performance, it is generally recommended that `--max-num-seqs` * " "`--data-parallel-size` >= the actual total concurrency." -msgstr "`--max-num-seqs` 表示每个 DP 组允许处理的最大请求数。如果发送到服务的请求数量超过此限制,超出的请求将保持在等待状态,不会被调度。请注意,在等待状态所花费的时间也会计入 TTFT 和 TPOT 等指标。因此,在测试性能时,通常建议 `--max-num-seqs` * `--data-parallel-size` >= 实际总并发数。" +msgstr "" +"`--max-num-seqs` 表示每个 DP " +"组允许处理的最大请求数。如果发送到服务的请求数量超过此限制,超出的请求将保持在等待状态,不会被调度。请注意,在等待状态所花费的时间也会计入 " +"TTFT 和 TPOT 等指标。因此,在测试性能时,通常建议 `--max-num-seqs` * `--data-parallel-size` " +">= 实际总并发数。" #: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:309 msgid "" "`--max-num-batched-tokens` represents the maximum number of tokens that " "the model can process in a single step. Currently, vLLM v1 scheduling " "enables ChunkPrefill/SplitFuse by default, which means:" -msgstr "`--max-num-batched-tokens` 表示模型单步可以处理的最大 token 数。目前,vLLM v1 调度默认启用 ChunkPrefill/SplitFuse,这意味着:" +msgstr "" +"`--max-num-batched-tokens` 表示模型单步可以处理的最大 token 数。目前,vLLM v1 调度默认启用 " +"ChunkPrefill/SplitFuse,这意味着:" #: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:310 msgid "" "(1) If the input length of a request is greater than `--max-num-batched-" "tokens`, it will be divided into multiple rounds of computation according" " to `--max-num-batched-tokens`;" -msgstr "(1)如果请求的输入长度大于 `--max-num-batched-tokens`,它将根据 `--max-num-batched-tokens` 被分成多轮计算;" +msgstr "" +"(1)如果请求的输入长度大于 `--max-num-batched-tokens`,它将根据 `--max-num-batched-tokens`" +" 被分成多轮计算;" #: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:311 msgid "" @@ -236,14 +263,22 @@ msgid "" "during actual inference (e.g., due to uneven EP load), setting `--gpu-" "memory-utilization` too high may lead to OOM (Out of Memory) issues " "during actual inference. The default value is `0.9`." -msgstr "`--gpu-memory-utilization` 表示 vLLM 将用于实际推理的 HBM 比例。其核心功能是计算可用的 kv_cache 大小。在预热阶段(vLLM 中称为 profile run),vLLM 会记录输入大小为 `--max-num-batched-tokens` 的推理过程中的峰值 GPU 内存使用量。然后,可用的 kv_cache 大小计算为:`--gpu-memory-utilization` * HBM 大小 - 峰值 GPU 内存使用量。因此,`--gpu-memory-utilization` 的值越大,可用的 kv_cache 就越多。然而,由于预热阶段的 GPU 内存使用量可能与实际推理期间不同(例如,由于 EP 负载不均),将 `--gpu-memory-utilization` 设置得过高可能导致实际推理时出现 OOM(内存不足)问题。默认值为 `0.9`。" +msgstr "" +"`--gpu-memory-utilization` 表示 vLLM 将用于实际推理的 HBM 比例。其核心功能是计算可用的 kv_cache " +"大小。在预热阶段(vLLM 中称为 profile run),vLLM 会记录输入大小为 `--max-num-batched-tokens` " +"的推理过程中的峰值 GPU 内存使用量。然后,可用的 kv_cache 大小计算为:`--gpu-memory-utilization` * " +"HBM 大小 - 峰值 GPU 内存使用量。因此,`--gpu-memory-utilization` 的值越大,可用的 kv_cache " +"就越多。然而,由于预热阶段的 GPU 内存使用量可能与实际推理期间不同(例如,由于 EP 负载不均),将 `--gpu-memory-" +"utilization` 设置得过高可能导致实际推理时出现 OOM(内存不足)问题。默认值为 `0.9`。" #: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:314 msgid "" "`--enable-expert-parallel` indicates that EP is enabled. Note that vLLM " "does not support a mixed approach of ETP and EP; that is, MoE can either " "use pure EP or pure TP." -msgstr "`--enable-expert-parallel` 表示启用了 EP。请注意,vLLM 不支持 ETP 和 EP 的混合方法;也就是说,MoE 只能使用纯 EP 或纯 TP。" +msgstr "" +"`--enable-expert-parallel` 表示启用了 EP。请注意,vLLM 不支持 ETP 和 EP 的混合方法;也就是说,MoE " +"只能使用纯 EP 或纯 TP。" #: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:315 msgid "" @@ -266,7 +301,11 @@ msgid "" "\"PIECEWISE\" and \"FULL_DECODE_ONLY\" are supported. The graph mode is " "mainly used to reduce the cost of operator dispatch. Currently, " "\"FULL_DECODE_ONLY\" is recommended." -msgstr "`--compilation-config` 包含与 aclgraph 图模式相关的配置。最重要的配置是 \"cudagraph_mode\" 和 \"cudagraph_capture_sizes\",其含义如下:\"cudagraph_mode\":表示特定的图模式。目前支持 \"PIECEWISE\" 和 \"FULL_DECODE_ONLY\"。图模式主要用于降低算子调度的开销。目前推荐使用 \"FULL_DECODE_ONLY\"。" +msgstr "" +"`--compilation-config` 包含与 aclgraph 图模式相关的配置。最重要的配置是 \"cudagraph_mode\" 和" +" \"cudagraph_capture_sizes\",其含义如下:\"cudagraph_mode\":表示特定的图模式。目前支持 " +"\"PIECEWISE\" 和 \"FULL_DECODE_ONLY\"。图模式主要用于降低算子调度的开销。目前推荐使用 " +"\"FULL_DECODE_ONLY\"。" #: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:319 msgid "" @@ -276,14 +315,19 @@ msgid "" " inputs between levels are automatically padded to the next level. " "Currently, the default setting is recommended. Only in some scenarios is " "it necessary to set this separately to achieve optimal performance." -msgstr "\"cudagraph_capture_sizes\":表示不同级别的图模式。默认值为 [1, 2, 4, 8, 16, 24, 32, 40,..., `--max-num-seqs`]。在图模式下,不同级别图的输入是固定的,级别之间的输入会自动填充到下一级别。目前推荐使用默认设置。仅在部分场景中,需要单独设置此参数以达到最佳性能。" +msgstr "" +"\"cudagraph_capture_sizes\":表示不同级别的图模式。默认值为 [1, 2, 4, 8, 16, 24, 32, " +"40,..., `--max-num-" +"seqs`]。在图模式下,不同级别图的输入是固定的,级别之间的输入会自动填充到下一级别。目前推荐使用默认设置。仅在部分场景中,需要单独设置此参数以达到最佳性能。" #: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:320 msgid "" "`export VLLM_ASCEND_ENABLE_FLASHCOMM1=1` indicates that Flashcomm1 " "optimization is enabled. Currently, this optimization is only supported " "for MoE in scenarios where tensor-parallel-size > 1." -msgstr "`export VLLM_ASCEND_ENABLE_FLASHCOMM1=1` 表示启用了 Flashcomm1 优化。目前,此优化仅在 tensor-parallel-size > 1 的场景下对 MoE 提供支持。" +msgstr "" +"`export VLLM_ASCEND_ENABLE_FLASHCOMM1=1` 表示启用了 Flashcomm1 优化。目前,此优化仅在 " +"tensor-parallel-size > 1 的场景下对 MoE 提供支持。" #: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:321 msgid "" @@ -291,7 +335,9 @@ msgid "" "parallel is enabled. This environment variable is required in the PD " "architecture but not needed in the PD co-locate deployment scenario. It " "will be removed in the future." -msgstr "`export VLLM_ASCEND_ENABLE_CONTEXT_PARALLEL=1` 表示启用了上下文并行。此环境变量在 PD 架构中是必需的,但在 PD 共置部署场景中不需要。未来将被移除。" +msgstr "" +"`export VLLM_ASCEND_ENABLE_CONTEXT_PARALLEL=1` 表示启用了上下文并行。此环境变量在 PD " +"架构中是必需的,但在 PD 共置部署场景中不需要。未来将被移除。" #: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:323 msgid "**Notice:**" @@ -314,22 +360,18 @@ msgid "Accuracy Evaluation" msgstr "精度评估" #: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:330 -msgid "Here are two accuracy evaluation methods." -msgstr "以下是两种精度评估方法。" - -#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:332 -#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:344 +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:342 msgid "Using AISBench" msgstr "使用 AISBench" -#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:334 +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:332 msgid "" "Refer to [Using " "AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " "details." msgstr "详情请参考[使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" -#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:336 +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:334 msgid "" "After execution, you can get the result, here is the result of " "`DeepSeek-V3.1-w8a8` for reference only." @@ -375,52 +417,55 @@ msgstr "生成" msgid "86.67" msgstr "86.67" -#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:342 +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:340 msgid "Performance" msgstr "性能" -#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:346 +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:344 msgid "" "Refer to [Using AISBench for performance " "evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" "performance-evaluation) for details." -msgstr "详情请参阅[使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" +msgstr "" +"详情请参阅[使用 AISBench " +"进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation)。" -#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:348 +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:346 msgid "Using vLLM Benchmark" msgstr "使用 vLLM 基准测试" -#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:350 +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:348 msgid "Run performance evaluation of `DeepSeek-V3.1-w8a8` as an example." msgstr "以运行 `DeepSeek-V3.1-w8a8` 的性能评估为例。" -#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:352 +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:350 msgid "" "Refer to [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/) " "for more details." msgstr "更多详情请参阅 [vllm 基准测试](https://docs.vllm.ai/en/latest/benchmarking/)。" -#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:354 +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:352 msgid "There are three `vllm bench` subcommands:" msgstr "`vllm bench` 包含三个子命令:" -#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:356 +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:354 msgid "`latency`: Benchmark the latency of a single batch of requests." msgstr "`latency`:对单批请求的延迟进行基准测试。" -#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:357 +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:355 msgid "`serve`: Benchmark the online serving throughput." msgstr "`serve`:对在线服务吞吐量进行基准测试。" -#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:358 +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:356 msgid "`throughput`: Benchmark offline inference throughput." msgstr "`throughput`:对离线推理吞吐量进行基准测试。" -#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:360 +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:358 msgid "Take the `serve` as an example. Run the code as follows." msgstr "以 `serve` 为例,按如下方式运行代码。" -#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:367 +#: ../../source/tutorials/features/long_sequence_context_parallel_multi_node.md:365 msgid "" "After about several minutes, you can get the performance evaluation " "result." diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_single_node.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_single_node.po index 159625fa..2d9d59b1 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_single_node.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_single_node.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -38,7 +38,9 @@ msgid "" "Using the `Qwen3-235B-A22B-w8a8` (Quantized version) model as an example," " use 1 Atlas 800 A3 (64G × 16) server to deploy the single node \"pd co-" "locate\" architecture." -msgstr "以 `Qwen3-235B-A22B-w8a8`(量化版本)模型为例,使用 1 台 Atlas 800 A3(64G × 16)服务器部署单节点 \"pd co-locate\" 架构。" +msgstr "" +"以 `Qwen3-235B-A22B-w8a8`(量化版本)模型为例,使用 1 台 Atlas 800 A3(64G × 16)服务器部署单节点 " +"\"pd co-locate\" 架构。" #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:9 msgid "Environment Preparation" @@ -53,7 +55,10 @@ msgid "" "`Qwen3-235B-A22B-w8a8` (Quantized version): requires 1 Atlas 800 A3 (64G " "× 16) node. [Download model weight](https://modelscope.cn/models/vllm-" "ascend/Qwen3-235B-A22B-W8A8)" -msgstr "`Qwen3-235B-A22B-w8a8`(量化版本):需要 1 个 Atlas 800 A3(64G × 16)节点。[下载模型权重](https://modelscope.cn/models/vllm-ascend/Qwen3-235B-A22B-W8A8)" +msgstr "" +"`Qwen3-235B-A22B-w8a8`(量化版本):需要 1 个 Atlas 800 A3(64G × " +"16)节点。[下载模型权重](https://modelscope.cn/models/vllm-ascend/Qwen3-235B-A22B-" +"W8A8)" #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:15 msgid "" @@ -69,6 +74,42 @@ msgstr "使用 Docker 运行" msgid "Start a Docker container on each node." msgstr "在每个节点上启动一个 Docker 容器。" +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "dataset" +msgstr "数据集" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "version" +msgstr "版本" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "metric" +msgstr "指标" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "mode" +msgstr "模式" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "vllm-api-general-chat" +msgstr "vllm-api-general-chat" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "aime2024" +msgstr "aime2024" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "-" +msgstr "-" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "accuracy" +msgstr "准确率" + +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 +msgid "gen" +msgstr "生成" + #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:63 msgid "Deployment" msgstr "部署" @@ -81,7 +122,9 @@ msgstr "单节点部署" msgid "" "`Qwen3-235B-A22B-w8a8` can be deployed on 1 Atlas 800 A3(64G*16). " "Quantized version needs to start with parameter `--quantization ascend`." -msgstr "`Qwen3-235B-A22B-w8a8` 可以部署在 1 台 Atlas 800 A3(64G*16)上。量化版本需要使用参数 `--quantization ascend` 启动。" +msgstr "" +"`Qwen3-235B-A22B-w8a8` 可以部署在 1 台 Atlas 800 A3(64G*16)上。量化版本需要使用参数 " +"`--quantization ascend` 启动。" #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:70 msgid "Run the following script to execute online 128k inference." @@ -98,7 +141,10 @@ msgid "" "for vllm version below `v0.12.0` use parameter: `--rope_scaling " "'{\"rope_type\":\"yarn\",\"factor\":4,\"original_max_position_embeddings\":32768}'" " \\`" -msgstr "对于 vllm 版本低于 `v0.12.0`,使用参数:`--rope_scaling '{\"rope_type\":\"yarn\",\"factor\":4,\"original_max_position_embeddings\":32768}' \\`" +msgstr "" +"对于 vllm 版本低于 `v0.12.0`,使用参数:`--rope_scaling " +"'{\"rope_type\":\"yarn\",\"factor\":4,\"original_max_position_embeddings\":32768}'" +" \\`" #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:109 #, python-brace-format @@ -107,7 +153,10 @@ msgid "" "'{\"rope_parameters\": " "{\"rope_type\":\"yarn\",\"rope_theta\":1000000,\"factor\":4,\"original_max_position_embeddings\":32768}}'" " \\`" -msgstr "对于 vllm 版本 `v0.12.0`,使用参数:`--hf-overrides '{\"rope_parameters\": {\"rope_type\":\"yarn\",\"rope_theta\":1000000,\"factor\":4,\"original_max_position_embeddings\":32768}}' \\`" +msgstr "" +"对于 vllm 版本 `v0.12.0`,使用参数:`--hf-overrides '{\"rope_parameters\": " +"{\"rope_type\":\"yarn\",\"rope_theta\":1000000,\"factor\":4,\"original_max_position_embeddings\":32768}}'" +" \\`" #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:111 msgid "The parameters are explained as follows:" @@ -146,21 +195,29 @@ msgid "" "state is also counted in metrics such as TTFT and TPOT. Therefore, when " "testing performance, it is generally recommended that `--max-num-seqs` * " "`--data-parallel-size` >= the actual total concurrency." -msgstr "`--max-num-seqs` 表示每个 DP 组允许处理的最大请求数。如果发送到服务的请求数量超过此限制,超出的请求将保持在等待状态,不会被调度。请注意,在等待状态所花费的时间也会计入 TTFT 和 TPOT 等指标。因此,在测试性能时,通常建议 `--max-num-seqs` * `--data-parallel-size` >= 实际总并发数。" +msgstr "" +"`--max-num-seqs` 表示每个 DP " +"组允许处理的最大请求数。如果发送到服务的请求数量超过此限制,超出的请求将保持在等待状态,不会被调度。请注意,在等待状态所花费的时间也会计入 " +"TTFT 和 TPOT 等指标。因此,在测试性能时,通常建议 `--max-num-seqs` * `--data-parallel-size` " +">= 实际总并发数。" #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:118 msgid "" "`--max-num-batched-tokens` represents the maximum number of tokens that " "the model can process in a single step. Currently, vLLM v1 scheduling " "enables ChunkPrefill/SplitFuse by default, which means:" -msgstr "`--max-num-batched-tokens` 表示模型单步可以处理的最大 token 数。目前,vLLM v1 调度默认启用 ChunkPrefill/SplitFuse,这意味着:" +msgstr "" +"`--max-num-batched-tokens` 表示模型单步可以处理的最大 token 数。目前,vLLM v1 调度默认启用 " +"ChunkPrefill/SplitFuse,这意味着:" #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:119 msgid "" "(1) If the input length of a request is greater than `--max-num-batched-" "tokens`, it will be divided into multiple rounds of computation according" " to `--max-num-batched-tokens`;" -msgstr "(1)如果请求的输入长度大于 `--max-num-batched-tokens`,它将根据 `--max-num-batched-tokens` 被分成多轮计算;" +msgstr "" +"(1)如果请求的输入长度大于 `--max-num-batched-tokens`,它将根据 `--max-num-batched-tokens`" +" 被分成多轮计算;" #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:120 msgid "" @@ -189,14 +246,22 @@ msgid "" "during actual inference (e.g., due to uneven EP load), setting `--gpu-" "memory-utilization` too high may lead to OOM (Out of Memory) issues " "during actual inference. The default value is `0.9`." -msgstr "`--gpu-memory-utilization` 表示 vLLM 将用于实际推理的 HBM 比例。其核心功能是计算可用的 kv_cache 大小。在预热阶段(vLLM 中称为 profile run),vLLM 会记录输入大小为 `--max-num-batched-tokens` 的推理过程中的峰值 GPU 内存使用量。然后,可用的 kv_cache 大小计算为:`--gpu-memory-utilization` * HBM 大小 - 峰值 GPU 内存使用量。因此,`--gpu-memory-utilization` 的值越大,可用的 kv_cache 就越多。然而,由于预热阶段的 GPU 内存使用量可能与实际推理时不同(例如,由于 EP 负载不均),将 `--gpu-memory-utilization` 设置得过高可能导致实际推理时出现 OOM(内存不足)问题。默认值为 `0.9`。" +msgstr "" +"`--gpu-memory-utilization` 表示 vLLM 将用于实际推理的 HBM 比例。其核心功能是计算可用的 kv_cache " +"大小。在预热阶段(vLLM 中称为 profile run),vLLM 会记录输入大小为 `--max-num-batched-tokens` " +"的推理过程中的峰值 GPU 内存使用量。然后,可用的 kv_cache 大小计算为:`--gpu-memory-utilization` * " +"HBM 大小 - 峰值 GPU 内存使用量。因此,`--gpu-memory-utilization` 的值越大,可用的 kv_cache " +"就越多。然而,由于预热阶段的 GPU 内存使用量可能与实际推理时不同(例如,由于 EP 负载不均),将 `--gpu-memory-" +"utilization` 设置得过高可能导致实际推理时出现 OOM(内存不足)问题。默认值为 `0.9`。" #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:123 msgid "" "`--enable-expert-parallel` indicates that EP is enabled. Note that vLLM " "does not support a mixed approach of ETP and EP; that is, MoE can either " "use pure EP or pure TP." -msgstr "`--enable-expert-parallel` 表示启用了 EP。请注意,vLLM 不支持 ETP 和 EP 的混合方法;也就是说,MoE 要么使用纯 EP,要么使用纯 TP。" +msgstr "" +"`--enable-expert-parallel` 表示启用了 EP。请注意,vLLM 不支持 ETP 和 EP 的混合方法;也就是说,MoE " +"要么使用纯 EP,要么使用纯 TP。" #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:124 msgid "" @@ -219,7 +284,11 @@ msgid "" "\"PIECEWISE\" and \"FULL_DECODE_ONLY\" are supported. The graph mode is " "mainly used to reduce the cost of operator dispatch. Currently, " "\"FULL_DECODE_ONLY\" is recommended." -msgstr "`--compilation-config` 包含与 aclgraph 图模式相关的配置。最重要的配置是 \"cudagraph_mode\" 和 \"cudagraph_capture_sizes\",其含义如下:\"cudagraph_mode\":表示具体的图模式。目前支持 \"PIECEWISE\" 和 \"FULL_DECODE_ONLY\"。图模式主要用于降低算子调度的开销。目前推荐使用 \"FULL_DECODE_ONLY\"。" +msgstr "" +"`--compilation-config` 包含与 aclgraph 图模式相关的配置。最重要的配置是 \"cudagraph_mode\" 和" +" \"cudagraph_capture_sizes\",其含义如下:\"cudagraph_mode\":表示具体的图模式。目前支持 " +"\"PIECEWISE\" 和 \"FULL_DECODE_ONLY\"。图模式主要用于降低算子调度的开销。目前推荐使用 " +"\"FULL_DECODE_ONLY\"。" #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:128 msgid "" @@ -229,14 +298,19 @@ msgid "" " inputs between levels are automatically padded to the next level. " "Currently, the default setting is recommended. Only in some scenarios is " "it necessary to set this separately to achieve optimal performance." -msgstr "\"cudagraph_capture_sizes\":表示不同级别的图模式。默认值为 [1, 2, 4, 8, 16, 24, 32, 40,..., `--max-num-seqs`]。在图模式下,不同级别图的输入是固定的,级别之间的输入会自动填充到下一个级别。目前推荐使用默认设置。仅在部分场景中,需要单独设置此参数以达到最佳性能。" +msgstr "" +"\"cudagraph_capture_sizes\":表示不同级别的图模式。默认值为 [1, 2, 4, 8, 16, 24, 32, " +"40,..., `--max-num-" +"seqs`]。在图模式下,不同级别图的输入是固定的,级别之间的输入会自动填充到下一个级别。目前推荐使用默认设置。仅在部分场景中,需要单独设置此参数以达到最佳性能。" #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:129 msgid "" "`export VLLM_ASCEND_ENABLE_FLASHCOMM1=1` indicates that Flashcomm1 " "optimization is enabled. Currently, this optimization is only supported " "for MoE in scenarios where tp_size > 1." -msgstr "`export VLLM_ASCEND_ENABLE_FLASHCOMM1=1` 表示启用了 Flashcomm1 优化。目前,此优化仅在 tp_size > 1 的场景下对 MoE 支持。" +msgstr "" +"`export VLLM_ASCEND_ENABLE_FLASHCOMM1=1` 表示启用了 Flashcomm1 优化。目前,此优化仅在 " +"tp_size > 1 的场景下对 MoE 支持。" #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:133 msgid "tp_size needs to be divisible by dcp_size" @@ -246,120 +320,85 @@ msgstr "tp_size 需要能被 dcp_size 整除" msgid "" "decode context parallel size must be less than or equal to max_dcp_size, " "where max_dcp_size = tensor_parallel_size // total_num_kv_heads." -msgstr "解码上下文并行大小必须小于或等于 max_dcp_size,其中 max_dcp_size = tensor_parallel_size // total_num_kv_heads。" +msgstr "" +"解码上下文并行大小必须小于或等于 max_dcp_size,其中 max_dcp_size = tensor_parallel_size // " +"total_num_kv_heads。" #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:136 msgid "Accuracy Evaluation" msgstr "精度评估" #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:138 -msgid "Here are two accuracy evaluation methods." -msgstr "以下是两种精度评估方法。" - -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:140 -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:152 +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:150 msgid "Using AISBench" msgstr "使用 AISBench" -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:142 +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:140 msgid "" "Refer to [Using " "AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " "details." msgstr "详情请参阅[使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:144 +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:142 msgid "" "After execution, you can get the result, here is the result of `Qwen3" "-235B-A22B-w8a8` for reference only." msgstr "执行后,您可以获得结果,以下是 `Qwen3-235B-A22B-w8a8` 的结果,仅供参考。" -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 -msgid "dataset" -msgstr "数据集" - -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 -msgid "version" -msgstr "版本" - -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 -msgid "metric" -msgstr "指标" - -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 -msgid "mode" -msgstr "模式" - -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 -msgid "vllm-api-general-chat" -msgstr "vllm-api-general-chat" - -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 -msgid "aime2024" -msgstr "aime2024" - -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 -msgid "-" -msgstr "-" - -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 -msgid "accuracy" -msgstr "准确率" - -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 -msgid "gen" -msgstr "生成" - #: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:21 msgid "83.33" msgstr "83.33" -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:150 +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:148 msgid "Performance" msgstr "性能" -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:154 +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:152 msgid "" "Refer to [Using AISBench for performance " "evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" "performance-evaluation) for details." -msgstr "详情请参阅[使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" +msgstr "" +"详情请参阅[使用 AISBench " +"进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation)。" -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:156 +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:154 msgid "Using vLLM Benchmark" msgstr "使用 vLLM Benchmark" -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:158 +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:156 msgid "Run performance evaluation of `Qwen3-235B-A22B-w8a8` as an example." msgstr "以运行 `Qwen3-235B-A22B-w8a8` 的性能评估为例。" -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:160 +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:158 msgid "" "Refer to [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/) " "for more details." msgstr "更多详情请参阅 [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/)。" -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:162 +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:160 msgid "There are three `vllm bench` subcommands:" msgstr "`vllm bench` 有三个子命令:" -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:164 +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:162 msgid "`latency`: Benchmark the latency of a single batch of requests." msgstr "`latency`:对单批请求的延迟进行基准测试。" -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:165 +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:163 msgid "`serve`: Benchmark the online serving throughput." msgstr "`serve`:对在线服务吞吐量进行基准测试。" -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:166 +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:164 msgid "`throughput`: Benchmark offline inference throughput." msgstr "`throughput`:对离线推理吞吐量进行基准测试。" -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:168 +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:166 msgid "Take the `serve` as an example. Run the code as follows." msgstr "以 `serve` 为例。运行代码如下。" -#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:175 +#: ../../source/tutorials/features/long_sequence_context_parallel_single_node.md:173 msgid "" "After about several minutes, you can get the performance evaluation " "result." diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_multi_node.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_multi_node.po index bf6e1c0c..50294364 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_multi_node.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_multi_node.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -41,7 +41,10 @@ msgid "" "prefiller server is 192.0.0.1 (prefill 1) and 192.0.0.2 (prefill 2), and " "the decoder servers are 192.0.0.3 (decoder 1) and 192.0.0.4 (decoder 2). " "On each server, use 8 NPUs 16 chips to deploy one service instance." -msgstr "以 Deepseek-r1-w8a8 模型为例,使用 4 台 Atlas 800T A3 服务器部署 \"2P1D\" 架构。假设预填充服务器 IP 为 192.0.0.1(预填充节点 1)和 192.0.0.2(预填充节点 2),解码服务器 IP 为 192.0.0.3(解码节点 1)和 192.0.0.4(解码节点 2)。每台服务器使用 8 个 NPU(16 个芯片)部署一个服务实例。" +msgstr "" +"以 Deepseek-r1-w8a8 模型为例,使用 4 台 Atlas 800T A3 服务器部署 \"2P1D\" 架构。假设预填充服务器 " +"IP 为 192.0.0.1(预填充节点 1)和 192.0.0.2(预填充节点 2),解码服务器 IP 为 192.0.0.3(解码节点 1)和" +" 192.0.0.4(解码节点 2)。每台服务器使用 8 个 NPU(16 个芯片)部署一个服务实例。" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:9 msgid "Verify Multi-Node Communication Environment" @@ -137,7 +140,10 @@ msgid "" " by Moonshot AI.Installation and Compilation Guide: First, we" " need to obtain the Mooncake project. Refer to the following command:" -msgstr "Mooncake 是月之暗面(Moonshot AI)提供的领先 LLM 服务 Kimi 的推理平台。安装与编译指南: 首先,我们需要获取 Mooncake 项目。参考以下命令:" +msgstr "" +"Mooncake 是月之暗面(Moonshot AI)提供的领先 LLM 服务 Kimi " +"的推理平台。安装与编译指南: 首先,我们需要获取 Mooncake 项目。参考以下命令:" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:183 msgid "(Optional) Replace go install url if the network is poor" @@ -185,7 +191,10 @@ msgid "" "socket listeners. To avoid any issues, port conflicts should be " "prevented. Additionally, ensure that each node's engine_id is uniquely " "assigned to avoid conflicts." -msgstr "我们可以分别运行以下脚本来在预填充器/解码器节点上启动服务器。请注意,每个 P/D 节点将占用从 kv_port 到 kv_port + num_chips 的端口范围来初始化 socket 监听器。为避免问题,应防止端口冲突。此外,请确保每个节点的 engine_id 被唯一分配,以避免冲突。" +msgstr "" +"我们可以分别运行以下脚本来在预填充器/解码器节点上启动服务器。请注意,每个 P/D 节点将占用从 kv_port 到 kv_port + " +"num_chips 的端口范围来初始化 socket 监听器。为避免问题,应防止端口冲突。此外,请确保每个节点的 engine_id " +"被唯一分配,以避免冲突。" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:227 msgid "kv_port Configuration Guide" @@ -198,7 +207,10 @@ msgid "" "npu_per_node × 1000)`. If `kv_port` overlaps with this range, " "intermittent port conflicts may occur. To avoid this, configure `kv_port`" " according to the table below:" -msgstr "在 Ascend NPU 上,Mooncake 使用 AscendDirectTransport 进行 RDMA 数据传输,它会在 `[20000, 20000 + npu_per_node × 1000)` 范围内随机分配端口。如果 `kv_port` 与此范围重叠,可能会发生间歇性端口冲突。为避免此问题,请根据下表配置 `kv_port`:" +msgstr "" +"在 Ascend NPU 上,Mooncake 使用 AscendDirectTransport 进行 RDMA 数据传输,它会在 " +"`[20000, 20000 + npu_per_node × 1000)` 范围内随机分配端口。如果 `kv_port` " +"与此范围重叠,可能会发生间歇性端口冲突。为避免此问题,请根据下表配置 `kv_port`:" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:132 msgid "NPUs per Node" @@ -242,7 +254,9 @@ msgid "" "during startup, it may be caused by kv_port conflicting with randomly " "allocated AscendDirectTransport ports. Increase your kv_port value to " "avoid the reserved range." -msgstr "如果在启动时偶尔看到 `zmq.error.ZMQError: Address already in use`,可能是由于 kv_port 与随机分配的 AscendDirectTransport 端口冲突所致。请增加您的 kv_port 值以避开保留范围。" +msgstr "" +"如果在启动时偶尔看到 `zmq.error.ZMQError: Address already in use`,可能是由于 kv_port " +"与随机分配的 AscendDirectTransport 端口冲突所致。请增加您的 kv_port 值以避开保留范围。" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:240 msgid "launch_online_dp.py" @@ -251,9 +265,12 @@ msgstr "launch_online_dp.py" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:242 msgid "" "Use `launch_online_dp.py` to launch external dp vllm servers. " -"[launch\\_online\\_dp.py](https://github.com/vllm-project/vllm-" +"[launch_online_dp.py](https://github.com/vllm-project/vllm-" +"ascend/blob/main/examples/external_online_dp/launch_online_dp.py)" +msgstr "" +"使用 `launch_online_dp.py` 启动外部解耦 vllm " +"服务器。[launch_online_dp.py](https://github.com/vllm-project/vllm-" "ascend/blob/main/examples/external_online_dp/launch_online_dp.py)" -msgstr "使用 `launch_online_dp.py` 启动外部解耦 vllm 服务器。[launch\\_online\\_dp.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/external_online_dp/launch_online_dp.py)" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:245 msgid "run_dp_template.sh" @@ -262,9 +279,12 @@ msgstr "run_dp_template.sh" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:247 msgid "" "Modify `run_dp_template.sh` on each node. " -"[run\\_dp\\_template.sh](https://github.com/vllm-project/vllm-" +"[run_dp_template.sh](https://github.com/vllm-project/vllm-" +"ascend/blob/main/examples/external_online_dp/run_dp_template.sh)" +msgstr "" +"在每个节点上修改 `run_dp_template.sh`。[run_dp_template.sh](https://github.com" +"/vllm-project/vllm-" "ascend/blob/main/examples/external_online_dp/run_dp_template.sh)" -msgstr "在每个节点上修改 `run_dp_template.sh`。[run\\_dp\\_template.sh](https://github.com/vllm-project/vllm-ascend/blob/main/examples/external_online_dp/run_dp_template.sh)" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:250 @@ -321,7 +341,12 @@ msgid "" "MooncakeLayerwiseConnector.[load\\_balance\\_proxy\\_layerwise\\_server\\_example.py](https://github.com" "/vllm-project/vllm-" "ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py)" -msgstr "**`load_balance_proxy_layerwise_server_example.py`**:请求首先被路由到 D 节点,然后根据需要转发到 P 节点。此代理设计用于与 MooncakeLayerwiseConnector 配合使用。[load\\_balance\\_proxy\\_layerwise\\_server\\_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py)" +msgstr "" +"**`load_balance_proxy_layerwise_server_example.py`**:请求首先被路由到 D " +"节点,然后根据需要转发到 P 节点。此代理设计用于与 MooncakeLayerwiseConnector " +"配合使用。[load_balance_proxy_layerwise_server_example.py](https://github.com" +"/vllm-project/vllm-" +"ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py)" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:756 msgid "" @@ -331,7 +356,12 @@ msgid "" "MooncakeConnector.[load\\_balance\\_proxy\\_server\\_example.py](https://github.com" "/vllm-project/vllm-" "ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" -msgstr "**`load_balance_proxy_server_example.py`**:请求首先被路由到 P 节点,然后转发到 D 节点进行后续处理。此代理设计用于与 MooncakeConnector 配合使用。[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" +msgstr "" +"**`load_balance_proxy_server_example.py`**:请求首先被路由到 P 节点,然后转发到 D " +"节点进行后续处理。此代理设计用于与 MooncakeConnector " +"配合使用。[load\\_balance\\_proxy\\_server\\_example.py](https://github.com" +"/vllm-project/vllm-" +"ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 msgid "Parameter" @@ -371,7 +401,7 @@ msgstr "--prefiller-ports" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 msgid "Ports of prefiller nodes" -msgstr "预填充节点的端口" +msgstr "预填充节点端口" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 msgid "--decoder-hosts" @@ -379,7 +409,7 @@ msgstr "--decoder-hosts" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 msgid "Hosts of decoder nodes" -msgstr "解码器节点的主机地址" +msgstr "解码器节点主机地址" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 msgid "--decoder-ports" @@ -387,7 +417,7 @@ msgstr "--decoder-ports" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:814 msgid "Ports of decoder nodes" -msgstr "解码器节点的端口" +msgstr "解码器节点端口" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:877 msgid "" @@ -396,9 +426,8 @@ msgid "" "project/vllm-" "ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" msgstr "" -"您可以在代码仓库的示例中找到代理程序," -"[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-" -"project/vllm-" +"您可以在代码仓库的示例中找到代理程序,[load\\_balance\\_proxy\\_server\\_example.py](https://github.com" +"/vllm-project/vllm-" "ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:879 @@ -411,8 +440,8 @@ msgid "" "[aisbench](https://gitee.com/aisbench/benchmark) Execute the following " "commands to install aisbench" msgstr "" -"我们推荐使用 aisbench 工具进行性能评估。" -"[aisbench](https://gitee.com/aisbench/benchmark) 执行以下命令安装 aisbench" +"我们推荐使用 aisbench 工具进行性能评估。[aisbench](https://gitee.com/aisbench/benchmark)" +" 执行以下命令安装 aisbench" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:889 msgid "" @@ -443,7 +472,9 @@ msgstr "以 gsm8k 数据集为例,执行以下命令来评估性能。" msgid "" "For more details for commands and parameters for aisbench, refer to " "[aisbench](https://gitee.com/aisbench/benchmark)" -msgstr "有关 aisbench 命令和参数的更多详细信息,请参考 [aisbench](https://gitee.com/aisbench/benchmark)" +msgstr "" +"有关 aisbench 命令和参数的更多详细信息,请参考 " +"[aisbench](https://gitee.com/aisbench/benchmark)" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:932 msgid "FAQ" @@ -459,8 +490,7 @@ msgid "" "warm-up to achieve best performance, we recommend preheating the service " "with some requests before conducting performance tests to achieve the " "best end-to-end throughput." -msgstr "" -"由于部分 NPU 算子的计算需要经过多轮预热才能达到最佳性能,我们建议在进行性能测试前,先用一些请求预热服务,以获得最佳的端到端吞吐量。" +msgstr "由于部分 NPU 算子的计算需要经过多轮预热才能达到最佳性能,我们建议在进行性能测试前,先用一些请求预热服务,以获得最佳的端到端吞吐量。" #: ../../source/tutorials/features/pd_disaggregation_mooncake_multi_node.md:938 msgid "Verification" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_single_node.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_single_node.po index 0de9ebcc..4df15770 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_single_node.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_single_node.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -24,7 +24,7 @@ msgid "Prefill-Decode Disaggregation (Qwen2.5-VL)" msgstr "预填充-解码解耦架构 (Qwen2.5-VL)" #: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:3 -msgid "Getting Start" +msgid "Getting Started" msgstr "开始使用" #: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:5 @@ -36,10 +36,10 @@ msgstr "vLLM-Ascend 现已支持预填充-解码 (PD) 解耦架构。本指南 #: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:7 msgid "" -"Using the Qwen2.5-VL-7B-Instruct model as an example, use vllm-ascend " +"Using the Qwen2.5-VL-7B-Instruct model as an example, use vLLM-Ascend " "v0.11.0rc1 (with vLLM v0.11.0) on 1 Atlas 800T A2 server to deploy the " "\"1P1D\" architecture. Assume the IP address is 192.0.0.1." -msgstr "以 Qwen2.5-VL-7B-Instruct 模型为例,在 1 台 Atlas 800T A2 服务器上使用 vllm-ascend v0.11.0rc1 (包含 vLLM v0.11.0) 部署 \"1P1D\" 架构。假设 IP 地址为 192.0.0.1。" +msgstr "以 Qwen2.5-VL-7B-Instruct 模型为例,在 1 台 Atlas 800T A2 服务器上使用 vLLM-Ascend v0.11.0rc1 (包含 vLLM v0.11.0) 部署 \"1P1D\" 架构。假设 IP 地址为 192.0.0.1。" #: ../../source/tutorials/features/pd_disaggregation_mooncake_single_node.md:9 msgid "Verify Communication Environment" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2.5.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2.5.po index d6a064d6..58537405 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2.5.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2.5.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -35,7 +35,8 @@ msgid "" "language understanding with advanced agentic capabilities, instant and " "thinking modes, as well as conversational and agentic paradigms." msgstr "" -"Kimi K2.5 是一个开源的、原生的多模态智能体模型,通过在 Kimi-K2-Base 基础上持续预训练约 15 万亿视觉和文本混合令牌构建而成。它无缝集成了视觉与语言理解能力、先进的智能体能力、即时与思考模式,以及对话式和智能体范式。" +"Kimi K2.5 是一个开源的、原生的多模态智能体模型,通过在 Kimi-K2-Base 基础上持续预训练约 15 " +"万亿视觉和文本混合令牌构建而成。它无缝集成了视觉与语言理解能力、先进的智能体能力、即时与思考模式,以及对话式和智能体范式。" #: ../../source/tutorials/models/Kimi-K2.5.md:7 msgid "The `Kimi-K2.5` model is first supported in `vllm-ascend:v0.17.0rc1`." @@ -58,7 +59,9 @@ msgid "" "Refer to [supported " "features](../../user_guide/support_matrix/supported_models.md) to get the" " model's supported feature matrix." -msgstr "请参考 [支持的特性](../../user_guide/support_matrix/supported_models.md) 获取模型支持的特性矩阵。" +msgstr "" +"请参考 [支持的特性](../../user_guide/support_matrix/supported_models.md) " +"获取模型支持的特性矩阵。" #: ../../source/tutorials/models/Kimi-K2.5.md:15 msgid "" @@ -78,14 +81,18 @@ msgstr "模型权重" msgid "" "`Kimi-K2.5-w4a8`(Quantized version for w4a8): [Download model " "weight](https://modelscope.cn/models/Eco-Tech/Kimi-K2.5-W4A8)." -msgstr "`Kimi-K2.5-w4a8`(w4a8量化版本):[下载模型权重](https://modelscope.cn/models/Eco-Tech/Kimi-K2.5-W4A8)。" +msgstr "" +"`Kimi-K2.5-w4a8`(w4a8量化版本):[下载模型权重](https://modelscope.cn/models/Eco-" +"Tech/Kimi-K2.5-W4A8)。" #: ../../source/tutorials/models/Kimi-K2.5.md:22 msgid "" "`kimi-k2.5-eagle3`(Eagle3 MTP draft model for accelerating inference of " "Kimi-K2.5): [Download model " "weight](https://huggingface.co/lightseekorg/kimi-k2.5-eagle3)" -msgstr "`kimi-k2.5-eagle3`(用于加速 Kimi-K2.5 推理的 Eagle3 MTP 草稿模型):[下载模型权重](https://huggingface.co/lightseekorg/kimi-k2.5-eagle3)" +msgstr "" +"`kimi-k2.5-eagle3`(用于加速 Kimi-K2.5 推理的 Eagle3 MTP " +"草稿模型):[下载模型权重](https://huggingface.co/lightseekorg/kimi-k2.5-eagle3)" #: ../../source/tutorials/models/Kimi-K2.5.md:24 msgid "" @@ -102,7 +109,9 @@ msgid "" "If you want to deploy multi-node environment, you need to verify multi-" "node communication according to [verify multi-node communication " "environment](../../installation.md#verify-multi-node-communication)." -msgstr "如果您想部署多节点环境,需要根据 [验证多节点通信环境](../../installation.md#verify-multi-node-communication) 验证多节点通信。" +msgstr "" +"如果您想部署多节点环境,需要根据 [验证多节点通信环境](../../installation.md#verify-multi-node-" +"communication) 验证多节点通信。" #: ../../source/tutorials/models/Kimi-K2.5.md:30 msgid "Installation" @@ -117,21 +126,26 @@ msgid "" "Select an image based on your machine type and start the docker image on " "your node, refer to [using docker](../../installation.md#set-up-using-" "docker)." -msgstr "根据您的机器类型选择镜像,并在节点上启动 docker 镜像,请参考 [使用 docker](../../installation.md#set-up-using-docker)。" +msgstr "" +"根据您的机器类型选择镜像,并在节点上启动 docker 镜像,请参考 [使用 docker](../../installation.md#set-" +"up-using-docker)。" -#: ../../source/tutorials/models/Kimi-K2.5.md +#: ../../source/tutorials/models/Kimi-K2.5.md:36 msgid "A3 series" msgstr "A3 系列" #: ../../source/tutorials/models/Kimi-K2.5.md:43 -#: ../../source/tutorials/models/Kimi-K2.5.md:86 msgid "Start the docker image on your each node." msgstr "在您的每个节点上启动 docker 镜像。" -#: ../../source/tutorials/models/Kimi-K2.5.md +#: ../../source/tutorials/models/Kimi-K2.5.md:45 msgid "A2 series" msgstr "A2 系列" +#: ../../source/tutorials/models/Kimi-K2.5.md:86 +msgid "Start the docker image on your each node." +msgstr "在您的每个节点上启动 docker 镜像。" + #: ../../source/tutorials/models/Kimi-K2.5.md:119 msgid "" "In addition, if you don't want to use the docker image as above, you can " @@ -169,7 +183,6 @@ msgid "Run the following script to execute online inference." msgstr "运行以下脚本执行在线推理。" #: ../../source/tutorials/models/Kimi-K2.5.md:176 -#: ../../source/tutorials/models/Kimi-K2.5.md:645 msgid "**Notice:** The parameters are explained as follows:" msgstr "**注意:** 参数解释如下:" @@ -180,7 +193,9 @@ msgid "" "reduce TPOT in v1 scheduler. However, TTFT may degrade in some scenarios." " Furthermore, enabling this feature is not recommended in scenarios where" " PD is separated." -msgstr "设置环境变量 `VLLM_ASCEND_BALANCE_SCHEDULING=1` 启用均衡调度。这可能有助于提高 v1 调度器中的输出吞吐量并降低 TPOT。然而,在某些场景下 TTFT 可能会下降。此外,在 PD 分离的场景中不建议启用此功能。" +msgstr "" +"设置环境变量 `VLLM_ASCEND_BALANCE_SCHEDULING=1` 启用均衡调度。这可能有助于提高 v1 " +"调度器中的输出吞吐量并降低 TPOT。然而,在某些场景下 TTFT 可能会下降。此外,在 PD 分离的场景中不建议启用此功能。" #: ../../source/tutorials/models/Kimi-K2.5.md:180 msgid "" @@ -195,7 +210,9 @@ msgid "" " with an input length of 3.5K and output length of 1.5K, a value of " "`16384` is sufficient, however, for precision testing, please set it at " "least `35000`." -msgstr "`--max-model-len` 指定最大上下文长度——即单个请求的输入和输出令牌总数。对于输入长度 3.5K 和输出长度 1.5K 的性能测试,`16384` 的值就足够了,但对于精度测试,请至少将其设置为 `35000`。" +msgstr "" +"`--max-model-len` 指定最大上下文长度——即单个请求的输入和输出令牌总数。对于输入长度 3.5K 和输出长度 1.5K " +"的性能测试,`16384` 的值就足够了,但对于精度测试,请至少将其设置为 `35000`。" #: ../../source/tutorials/models/Kimi-K2.5.md:182 msgid "" @@ -244,14 +261,18 @@ msgstr "Prefill-Decode 分离" msgid "" "We recommend using Mooncake for deployment: " "[Mooncake](../features/pd_disaggregation_mooncake_multi_node.md)." -msgstr "我们建议使用 Mooncake 进行部署:[Mooncake](../features/pd_disaggregation_mooncake_multi_node.md)。" +msgstr "" +"我们建议使用 Mooncake " +"进行部署:[Mooncake](../features/pd_disaggregation_mooncake_multi_node.md)。" #: ../../source/tutorials/models/Kimi-K2.5.md:326 msgid "" "Take Atlas 800 A3 (64G × 16) for example, we recommend to deploy 2P1D (4 " "nodes) rather than 1P1D (2 nodes), because there is no enough NPU memory " "to serve high concurrency in 1P1D case." -msgstr "以 Atlas 800 A3(64G × 16)为例,我们建议部署 2P1D(4 个节点)而不是 1P1D(2 个节点),因为在 1P1D 情况下没有足够的 NPU 内存来服务高并发。" +msgstr "" +"以 Atlas 800 A3(64G × 16)为例,我们建议部署 2P1D(4 个节点)而不是 1P1D(2 个节点),因为在 1P1D " +"情况下没有足够的 NPU 内存来服务高并发。" #: ../../source/tutorials/models/Kimi-K2.5.md:328 msgid "`Kimi-K2.5-w4a8 2P1D` require 4 Atlas 800 A3 (64G × 16)." @@ -263,14 +284,20 @@ msgid "" "to deploy a `launch_dp_program.py` script and a `run_dp_template.sh` " "script on each node and deploy a `proxy.sh` script on prefill master node" " to forward requests." -msgstr "要运行 vllm-ascend `Prefill-Decode Disaggregation` 服务,您需要在每个节点上部署一个 `launch_dp_program.py` 脚本和一个 `run_dp_template.sh` 脚本,并在 prefill 主节点上部署一个 `proxy.sh` 脚本来转发请求。" +msgstr "" +"要运行 vllm-ascend `Prefill-Decode Disaggregation` 服务,您需要在每个节点上部署一个 " +"`launch_dp_program.py` 脚本和一个 `run_dp_template.sh` 脚本,并在 prefill 主节点上部署一个 " +"`proxy.sh` 脚本来转发请求。" #: ../../source/tutorials/models/Kimi-K2.5.md:332 msgid "" "`launch_online_dp.py` to launch external dp vllm servers. " "[launch\\_online\\_dp.py](https://github.com/vllm-project/vllm-" "ascend/blob/main/examples/external_online_dp/launch_online_dp.py)" -msgstr "`launch_online_dp.py` 用于启动外部 dp vllm 服务器。[launch\\_online\\_dp.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/external_online_dp/launch_online_dp.py)" +msgstr "" +"`launch_online_dp.py` 用于启动外部 dp vllm " +"服务器。[launch\\_online\\_dp.py](https://github.com/vllm-project/vllm-" +"ascend/blob/main/examples/external_online_dp/launch_online_dp.py)" #: ../../source/tutorials/models/Kimi-K2.5.md:335 msgid "Prefill Node 0 `run_dp_template.sh` script" @@ -288,6 +315,10 @@ msgstr "Decode 节点 0 `run_dp_template.sh` 脚本" msgid "Decode Node 1 `run_dp_template.sh` script" msgstr "Decode 节点 1 `run_dp_template.sh` 脚本" +#: ../../source/tutorials/models/Kimi-K2.5.md:645 +msgid "**Notice:** The parameters are explained as follows:" +msgstr "**注意:** 参数解释如下:" + #: ../../source/tutorials/models/Kimi-K2.5.md:648 msgid "" "`VLLM_ASCEND_ENABLE_FLASHCOMM1=1`: enables the communication optimization" @@ -300,7 +331,9 @@ msgid "" "significantly improve performance but consumes more NPU memory. In the " "Prefill-Decode (PD) separation scenario, enable MLAPO only on decode " "nodes." -msgstr "`VLLM_ASCEND_ENABLE_MLAPO=1`:启用融合算子,这可以显著提高性能但会消耗更多 NPU 内存。在 Prefill-Decode(PD)分离场景中,仅在 decode 节点上启用 MLAPO。" +msgstr "" +"`VLLM_ASCEND_ENABLE_MLAPO=1`:启用融合算子,这可以显著提高性能但会消耗更多 NPU 内存。在 Prefill-" +"Decode(PD)分离场景中,仅在 decode 节点上启用 MLAPO。" #: ../../source/tutorials/models/Kimi-K2.5.md:650 msgid "" @@ -316,7 +349,9 @@ msgid "" "the min is `n = 1` and the max is `n = max-num-seqs`. For other values, " "it is recommended to set them to the number of frequently occurring " "requests on the Decode (D) node." -msgstr "`cudagraph_capture_sizes`:推荐值为 `n x (mtp + 1)`。最小值为 `n = 1`,最大值为 `n = max-num-seqs`。对于其他值,建议将其设置为 Decode(D)节点上频繁出现的请求数量。" +msgstr "" +"`cudagraph_capture_sizes`:推荐值为 `n x (mtp + 1)`。最小值为 `n = 1`,最大值为 `n = " +"max-num-seqs`。对于其他值,建议将其设置为 Decode(D)节点上频繁出现的请求数量。" #: ../../source/tutorials/models/Kimi-K2.5.md:652 msgid "" @@ -325,7 +360,8 @@ msgid "" "requests will be sent to the prefill node to recompute the KV Cache. In " "the PD separation scenario, it is recommended to enable this " "configuration on both prefill and decode nodes simultaneously." -msgstr "`recompute_scheduler_enable: true`:启用重计算调度器。当 decode 节点的键值缓存(KV Cache)不足时,请求将被发送到 prefill 节点以重新计算 KV Cache。在 PD 分离场景中,建议同时在 prefill 和 decode 节点上启用此配置。" +msgstr "" +"`recompute_scheduler_enable: true`:启用重计算调度器。当解码节点的键值缓存(KV Cache)不足时,请求将被发送到预填充节点以重新计算 KV Cache。在 PD 分离场景中,建议同时在预填充和解码节点上启用此配置。" #: ../../source/tutorials/models/Kimi-K2.5.md:653 msgid "" @@ -333,7 +369,8 @@ msgid "" "(TP) size is 1 or `enable_shared_expert_dp: true`, an additional stream " "is enabled to overlap the computation process of shared experts for " "improved efficiency." -msgstr "`multistream_overlap_shared_expert: true`:当张量并行(TP)大小为 1 或 `enable_shared_expert_dp: true` 时,启用额外的流来重叠共享专家的计算过程以提高效率。" +msgstr "" +"`multistream_overlap_shared_expert: true`:当张量并行(TP)大小为 1 或 `enable_shared_expert_dp: true` 时,启用额外的流来重叠共享专家的计算过程以提高效率。" #: ../../source/tutorials/models/Kimi-K2.5.md:655 msgid "run server for each node:" @@ -341,7 +378,7 @@ msgstr "为每个节点运行服务器:" #: ../../source/tutorials/models/Kimi-K2.5.md:668 msgid "Run the `proxy.sh` script on the prefill master node" -msgstr "在 prefill 主节点上运行 `proxy.sh` 脚本" +msgstr "在预填充主节点上运行 `proxy.sh` 脚本" #: ../../source/tutorials/models/Kimi-K2.5.md:670 msgid "" @@ -350,7 +387,8 @@ msgid "" "[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-" "project/vllm-" "ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" -msgstr "在与 prefiller 服务实例相同的节点上运行一个代理服务器。您可以在仓库的示例中找到代理程序:[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" +msgstr "" +"在与预填充服务实例相同的节点上运行一个代理服务器。您可以在仓库的示例中找到代理程序:[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" #: ../../source/tutorials/models/Kimi-K2.5.md:726 msgid "Functional Verification" @@ -567,8 +605,8 @@ msgid "" msgstr "**问:启动失败,提示 HCCL 端口冲突(地址已被占用)。我该怎么办?**" #: ../../source/tutorials/models/Kimi-K2.5.md:812 -msgid "A: Clean up old processes and restart: `pkill -f VLLM*`." -msgstr "答:清理旧进程并重启:`pkill -f VLLM*`。" +msgid "A: Clean up old processes and restart: `pkill -f vLLM*`." +msgstr "答:清理旧进程并重启:`pkill -f vLLM*`。" #: ../../source/tutorials/models/Kimi-K2.5.md:814 msgid "**Q: How to handle OOM or unstable startup?**" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-Omni.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-Omni.po index 1e9fe06b..cc394166 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-Omni.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-Omni.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -42,7 +42,9 @@ msgid "" "including supported features, feature configuration, environment " "preparation, single-NPU and multi-NPU deployment, accuracy and " "performance evaluation." -msgstr "`Qwen2.5-Omni` 模型自 `vllm-ascend:v0.11.0rc0` 版本起获得支持。本文档将展示该模型的主要验证步骤,包括支持的特性、特性配置、环境准备、单NPU和多NPU部署、精度和性能评估。" +msgstr "" +"`Qwen2.5-Omni` 模型自 `vllm-ascend:v0.11.0rc0` " +"版本起获得支持。本文档将展示该模型的主要验证步骤,包括支持的特性、特性配置、环境准备、单NPU和多NPU部署、精度和性能评估。" #: ../../source/tutorials/models/Qwen2.5-Omni.md:9 msgid "Supported Features" @@ -73,13 +75,17 @@ msgstr "模型权重" msgid "" "`Qwen2.5-Omni-3B`(BF16): [Download model " "weight](https://modelscope.cn/models/Qwen/Qwen2.5-Omni-3B)" -msgstr "`Qwen2.5-Omni-3B`(BF16): [下载模型权重](https://modelscope.cn/models/Qwen/Qwen2.5-Omni-3B)" +msgstr "" +"`Qwen2.5-Omni-3B`(BF16): " +"[下载模型权重](https://modelscope.cn/models/Qwen/Qwen2.5-Omni-3B)" #: ../../source/tutorials/models/Qwen2.5-Omni.md:20 msgid "" "`Qwen2.5-Omni-7B`(BF16): [Download model " "weight](https://modelscope.cn/models/Qwen/Qwen2.5-Omni-7B)" -msgstr "`Qwen2.5-Omni-7B`(BF16): [下载模型权重](https://modelscope.cn/models/Qwen/Qwen2.5-Omni-7B)" +msgstr "" +"`Qwen2.5-Omni-7B`(BF16): " +"[下载模型权重](https://modelscope.cn/models/Qwen/Qwen2.5-Omni-7B)" #: ../../source/tutorials/models/Qwen2.5-Omni.md:22 msgid "Following examples use the 7B version by default." @@ -98,7 +104,9 @@ msgid "" "Select an image based on your machine type and start the docker image on " "your node, refer to [using docker](../../installation.md#set-up-using-" "docker)." -msgstr "根据您的机器类型选择镜像并在节点上启动 docker 镜像,请参考[使用 docker](../../installation.md#set-up-using-docker)。" +msgstr "" +"根据您的机器类型选择镜像并在节点上启动 docker 镜像,请参考[使用 docker](../../installation.md#set-" +"up-using-docker)。" #: ../../source/tutorials/models/Qwen2.5-Omni.md:65 msgid "Deployment" @@ -114,18 +122,22 @@ msgstr "单 NPU (Qwen2.5-Omni-7B)" #: ../../source/tutorials/models/Qwen2.5-Omni.md:72 msgid "" -"The **environment variable** `LOCAL_MEDIA_PATH` which **allows** API " -"requests to read local images or videos from directories specified by the" -" server file system. Please note this is a security risk. Should only be " -"enabled in trusted environments." -msgstr "**环境变量** `LOCAL_MEDIA_PATH` **允许** API 请求从服务器文件系统指定的目录读取本地图像或视频。请注意,这存在安全风险。应仅在受信任的环境中启用。" +"The environment variable `LOCAL_MEDIA_PATH` which allows API requests to " +"read local images or videos from directories specified by the server file" +" system. Please note this is a security risk. Should only be enabled in " +"trusted environments." +msgstr "" +"环境变量 `LOCAL_MEDIA_PATH` 允许 API " +"请求从服务器文件系统指定的目录读取本地图像或视频。请注意,这存在安全风险。应仅在受信任的环境中启用。" #: ../../source/tutorials/models/Qwen2.5-Omni.md:92 msgid "" "Now vllm-ascend docker image should contain vllm[audio] build part, if " "you encounter *audio not supported issue* by any chance, please re-build " "vllm with [audio] flag." -msgstr "当前 vllm-ascend docker 镜像应包含 vllm[audio] 构建部分,如果您遇到*音频不支持的问题*,请使用 [audio] 标志重新构建 vllm。" +msgstr "" +"当前 vllm-ascend docker 镜像应包含 vllm[audio] 构建部分,如果您遇到*音频不支持的问题*,请使用 [audio] " +"标志重新构建 vllm。" #: ../../source/tutorials/models/Qwen2.5-Omni.md:100 msgid "" @@ -162,8 +174,8 @@ msgid "Functional Verification" msgstr "功能验证" #: ../../source/tutorials/models/Qwen2.5-Omni.md:131 -msgid "If your service **starts** successfully, you can see the info shown below:" -msgstr "如果您的服务**启动**成功,您可以看到如下所示的信息:" +msgid "If your service starts successfully, you can see the info shown below:" +msgstr "如果您的服务启动成功,您可以看到如下所示的信息:" #: ../../source/tutorials/models/Qwen2.5-Omni.md:139 msgid "Once your server is started, you can query the model with input prompts:" @@ -258,7 +270,10 @@ msgid "" "Refer to [Using AISBench for performance " "evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" "performance-evaluation) for details." -msgstr "详情请参考[使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" +msgstr "" +"详情请参考[使用 AISBench " +"进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-" +"performance-evaluation)。" #: ../../source/tutorials/models/Qwen2.5-Omni.md:194 msgid "Using vLLM Benchmark" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Dense.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Dense.po index 1d2f2c88..d01b5189 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Dense.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Dense.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -35,9 +35,8 @@ msgid "" "advancements in reasoning, instruction-following, agent capabilities, and" " multilingual support." msgstr "" -"Qwen3 是 Qwen 系列最新一代的大语言模型,提供了一套完整的稠密模型和专家混合" -"(MoE) 模型。基于广泛的训练,Qwen3 在推理、指令遵循、智能体能力和多语言支持方" -"面实现了突破性进展。" +"Qwen3 是 Qwen 系列最新一代的大语言模型,提供了一套完整的稠密模型和专家混合(MoE) 模型。基于广泛的训练,Qwen3 " +"在推理、指令遵循、智能体能力和多语言支持方面实现了突破性进展。" #: ../../source/tutorials/models/Qwen3-Dense.md:7 msgid "" @@ -47,18 +46,15 @@ msgid "" "optimization points. We will also explore how adjusting service " "parameters can maximize throughput performance across various scenarios." msgstr "" -"欢迎阅读在 vLLM-Ascend 环境中优化 Qwen 稠密模型的教程。本指南将帮助您为您的用" -"例配置最有效的设置,并通过实际示例突出关键优化点。我们还将探讨如何调整服务参" -"数以在各种场景下最大化吞吐性能。" +"欢迎阅读在 vLLM-Ascend 环境中优化 Qwen " +"稠密模型的教程。本指南将帮助您为您的用例配置最有效的设置,并通过实际示例突出关键优化点。我们还将探讨如何调整服务参数以在各种场景下最大化吞吐性能。" #: ../../source/tutorials/models/Qwen3-Dense.md:9 msgid "" "This document will show the main verification steps of the model, " "including supported features, feature configuration, environment " "preparation, accuracy and performance evaluation." -msgstr "" -"本文档将展示模型的主要验证步骤,包括支持的特性、特性配置、环境准备、精度和性" -"能评估。" +msgstr "本文档将展示模型的主要验证步骤,包括支持的特性、特性配置、环境准备、精度和性能评估。" #: ../../source/tutorials/models/Qwen3-Dense.md:11 msgid "" @@ -68,11 +64,9 @@ msgid "" "20250429). This example requires version **v0.11.0rc2**. Earlier versions" " may lack certain features." msgstr "" -"Qwen3 稠密模型首次在 " -"[v0.8.4rc2](https://github.com/vllm-project/vllm-" +"Qwen3 稠密模型首次在 [v0.8.4rc2](https://github.com/vllm-project/vllm-" "ascend/blob/main/docs/source/user_guide/release_notes.md#v084rc2---" -"20250429) 中得到支持。本示例需要版本 **v0.11.0rc2**。更早的版本可能缺少某些特" -"性。" +"20250429) 中得到支持。本示例需要版本 **v0.11.0rc2**。更早的版本可能缺少某些特性。" #: ../../source/tutorials/models/Qwen3-Dense.md:13 msgid "Supported Features" @@ -84,16 +78,14 @@ msgid "" "features](../../user_guide/support_matrix/supported_models.md) to get the" " model's supported feature matrix." msgstr "" -"请参考 [支持的特性](../../user_guide/support_matrix/supported_models." -"md) 以获取模型支持的特性矩阵。" +"请参考 [支持的特性](../../user_guide/support_matrix/supported_models.md) " +"以获取模型支持的特性矩阵。" #: ../../source/tutorials/models/Qwen3-Dense.md:17 msgid "" "Refer to [feature guide](../../user_guide/feature_guide/index.md) to get " "the feature's configuration." -msgstr "" -"请参考 [特性指南](../../user_guide/feature_guide/index.md) 以获取特性的配置信" -"息。" +msgstr "请参考 [特性指南](../../user_guide/feature_guide/index.md) 以获取特性的配置信息。" #: ../../source/tutorials/models/Qwen3-Dense.md:19 msgid "Environment Preparation" @@ -109,9 +101,9 @@ msgid "" "Atlas 800I A2 (64G × 1) card. [Download model " "weight](https://modelers.cn/models/Modelers_Park/Qwen3-0.6B)" msgstr "" -"`Qwen3-0.6B`(BF16 版本): 需要 1 张 Atlas 800 A3 (64G × 2) 卡或 1 张 Atlas " -"800I A2 (64G × 1) 卡。[下载模型权重](https://modelers.cn/models/" -"Modelers_Park/Qwen3-0.6B)" +"`Qwen3-0.6B`(BF16 版本): 需要 1 张 Atlas 800 A3 (64G × 2) 卡或 1 张 Atlas 800I A2" +" (64G × 1) " +"卡。[下载模型权重](https://modelers.cn/models/Modelers_Park/Qwen3-0.6B)" #: ../../source/tutorials/models/Qwen3-Dense.md:24 msgid "" @@ -119,9 +111,9 @@ msgid "" "Atlas 800I A2 (64G × 1) card. [Download model " "weight](https://modelers.cn/models/Modelers_Park/Qwen3-1.7B)" msgstr "" -"`Qwen3-1.7B`(BF16 版本): 需要 1 张 Atlas 800 A3 (64G × 2) 卡或 1 张 Atlas " -"800I A2 (64G × 1) 卡。[下载模型权重](https://modelers.cn/models/" -"Modelers_Park/Qwen3-1.7B)" +"`Qwen3-1.7B`(BF16 版本): 需要 1 张 Atlas 800 A3 (64G × 2) 卡或 1 张 Atlas 800I A2" +" (64G × 1) " +"卡。[下载模型权重](https://modelers.cn/models/Modelers_Park/Qwen3-1.7B)" #: ../../source/tutorials/models/Qwen3-Dense.md:25 msgid "" @@ -129,9 +121,8 @@ msgid "" "Atlas 800I A2 (64G × 1) card. [Download model " "weight](https://modelers.cn/models/Modelers_Park/Qwen3-4B)" msgstr "" -"`Qwen3-4B`(BF16 版本): 需要 1 张 Atlas 800 A3 (64G × 2) 卡或 1 张 Atlas " -"800I A2 (64G × 1) 卡。[下载模型权重](https://modelers.cn/models/" -"Modelers_Park/Qwen3-4B)" +"`Qwen3-4B`(BF16 版本): 需要 1 张 Atlas 800 A3 (64G × 2) 卡或 1 张 Atlas 800I A2 " +"(64G × 1) 卡。[下载模型权重](https://modelers.cn/models/Modelers_Park/Qwen3-4B)" #: ../../source/tutorials/models/Qwen3-Dense.md:26 msgid "" @@ -139,9 +130,8 @@ msgid "" "Atlas 800I A2 (64G × 1) card. [Download model " "weight](https://modelers.cn/models/Modelers_Park/Qwen3-8B)" msgstr "" -"`Qwen3-8B`(BF16 版本): 需要 1 张 Atlas 800 A3 (64G × 2) 卡或 1 张 Atlas " -"800I A2 (64G × 1) 卡。[下载模型权重](https://modelers.cn/models/" -"Modelers_Park/Qwen3-8B)" +"`Qwen3-8B`(BF16 版本): 需要 1 张 Atlas 800 A3 (64G × 2) 卡或 1 张 Atlas 800I A2 " +"(64G × 1) 卡。[下载模型权重](https://modelers.cn/models/Modelers_Park/Qwen3-8B)" #: ../../source/tutorials/models/Qwen3-Dense.md:27 msgid "" @@ -149,9 +139,8 @@ msgid "" "Atlas 800I A2 (64G × 1) cards. [Download model " "weight](https://modelers.cn/models/Modelers_Park/Qwen3-14B)" msgstr "" -"`Qwen3-14B`(BF16 版本): 需要 1 张 Atlas 800 A3 (64G × 2) 卡或 2 张 Atlas " -"800I A2 (64G × 1) 卡。[下载模型权重](https://modelers.cn/models/" -"Modelers_Park/Qwen3-14B)" +"`Qwen3-14B`(BF16 版本): 需要 1 张 Atlas 800 A3 (64G × 2) 卡或 2 张 Atlas 800I A2 " +"(64G × 1) 卡。[下载模型权重](https://modelers.cn/models/Modelers_Park/Qwen3-14B)" #: ../../source/tutorials/models/Qwen3-Dense.md:28 msgid "" @@ -159,9 +148,8 @@ msgid "" "Atlas 800I A2 (64G × 4) cards. [Download model " "weight](https://modelers.cn/models/Modelers_Park/Qwen3-32B)" msgstr "" -"`Qwen3-32B`(BF16 版本): 需要 2 张 Atlas 800 A3 (64G × 4) 卡或 4 张 Atlas " -"800I A2 (64G × 4) 卡。[下载模型权重](https://modelers.cn/models/" -"Modelers_Park/Qwen3-32B)" +"`Qwen3-32B`(BF16 版本): 需要 2 张 Atlas 800 A3 (64G × 4) 卡或 4 张 Atlas 800I A2 " +"(64G × 4) 卡。[下载模型权重](https://modelers.cn/models/Modelers_Park/Qwen3-32B)" #: ../../source/tutorials/models/Qwen3-Dense.md:29 msgid "" @@ -169,9 +157,9 @@ msgid "" "cards or 4 Atlas 800I A2 (64G × 4) cards. [Download model " "weight](https://www.modelscope.cn/models/vllm-ascend/Qwen3-32B-W8A8)" msgstr "" -"`Qwen3-32B-W8A8`(量化版本): 需要 2 张 Atlas 800 A3 (64G × 4) 卡或 4 张 " -"Atlas 800I A2 (64G × 4) 卡。[下载模型权重](https://www.modelscope.cn/" -"models/vllm-ascend/Qwen3-32B-W8A8)" +"`Qwen3-32B-W8A8`(量化版本): 需要 2 张 Atlas 800 A3 (64G × 4) 卡或 4 张 Atlas 800I " +"A2 (64G × 4) 卡。[下载模型权重](https://www.modelscope.cn/models/vllm-" +"ascend/Qwen3-32B-W8A8)" #: ../../source/tutorials/models/Qwen3-Dense.md:31 msgid "" @@ -195,8 +183,8 @@ msgid "" "node communication according to [verify multi-node communication " "environment](../../installation.md#verify-multi-node-communication)." msgstr "" -"如果您想部署多节点环境,需要根据 [验证多节点通信环境](../../installation." -"md#verify-multi-node-communication) 来验证多节点通信。" +"如果您想部署多节点环境,需要根据 [验证多节点通信环境](../../installation.md#verify-multi-node-" +"communication) 来验证多节点通信。" #: ../../source/tutorials/models/Qwen3-Dense.md:39 msgid "Installation" @@ -208,8 +196,9 @@ msgid "" "Currently, we provide the all-in-one images.[Download " "images](https://quay.io/repository/ascend/vllm-ascend?tab=tags)" msgstr "" -"您可以使用我们的官方 docker 镜像来支持 Qwen3 稠密模型。目前,我们提供一体化镜" -"像。[下载镜像](https://quay.io/repository/ascend/vllm-ascend?tab=tags)" +"您可以使用我们的官方 docker 镜像来支持 Qwen3 " +"稠密模型。目前,我们提供一体化镜像。[下载镜像](https://quay.io/repository/ascend/vllm-" +"ascend?tab=tags)" #: ../../source/tutorials/models/Qwen3-Dense.md:44 msgid "Docker Pull (by tag)" @@ -227,18 +216,15 @@ msgid "" " (`pip install -e`) to help developer immediately take place changes " "without requiring a new installation." msgstr "" -"默认工作目录是 `/workspace`,vLLM 和 vLLM Ascend 代码放置在 `/vllm-" -"workspace` 中,并以 [开发模式](https://setuptools.pypa.io/en/latest/" -"userguide/development_mode.html) (`pip install -e`) 安装,以帮助开发者立即应用" -"更改而无需重新安装。" +"默认工作目录是 `/workspace`,vLLM 和 vLLM Ascend 代码放置在 `/vllm-workspace` 中,并以 " +"[开发模式](https://setuptools.pypa.io/en/latest/userguide/development_mode.html)" +" (`pip install -e`) 安装,以帮助开发者立即应用更改而无需重新安装。" #: ../../source/tutorials/models/Qwen3-Dense.md:92 msgid "" "In the [Run docker container](./Qwen3-Dense.md#run-docker-container), " "detailed explanations are provided through specific examples." -msgstr "" -"在 [运行 docker 容器](./Qwen3-Dense.md#run-docker-container) 中,通过具体示例" -"提供了详细说明。" +msgstr "在 [运行 docker 容器](./Qwen3-Dense.md#run-docker-container) 中,通过具体示例提供了详细说明。" #: ../../source/tutorials/models/Qwen3-Dense.md:94 msgid "" @@ -273,11 +259,10 @@ msgid "" "max_num_batched_tokens, and cudagraph_capture_sizes, to achieve the best " "performance." msgstr "" -"在本节中,我们将演示在 vLLM-Ascend 中调整超参数以实现最大推理吞吐性能的最佳实" -"践。通过定制服务级配置以适应不同的用例,您可以确保您的系统在各种场景下都能达" -"到最佳性能。我们将指导您如何根据观察到的现象(例如 max_model_len、" -"max_num_batched_tokens 和 cudagraph_capture_sizes)来微调超参数,以获得最佳性" -"能。" +"在本节中,我们将演示在 vLLM-Ascend " +"中调整超参数以实现最大推理吞吐性能的最佳实践。通过定制服务级配置以适应不同的用例,您可以确保您的系统在各种场景下都能达到最佳性能。我们将指导您如何根据观察到的现象(例如" +" max_model_len、max_num_batched_tokens 和 " +"cudagraph_capture_sizes)来微调超参数,以获得最佳性能。" #: ../../source/tutorials/models/Qwen3-Dense.md:104 msgid "The specific example scenario is as follows:" @@ -364,11 +349,9 @@ msgid "" " these scenarios and this parameter will be removed." msgstr "" "**[可选]** `--additional-config '{\"pa_shape_list\":[48,64,72,80]}'`: " -"`pa_shape_list` 指定了您希望切换到 PA 算子的批次大小。这是一个临时的调优旋" -"钮。目前,注意力算子调度默认使用 FIA 算子。在某些批次大小(并发)设置下,FIA " -"可能性能不佳。通过设置 `pa_shape_list`,当运行时批次大小与列出的值之一匹配时," -"vLLM-Ascend 将用 PA 算子替换 FIA 算子以防止性能下降。未来,FIA 将针对这些场景" -"进行优化,此参数将被移除。" +"`pa_shape_list` 指定了您希望切换到 PA 算子的批次大小。这是一个临时的调优旋钮。目前,注意力算子调度默认使用 FIA " +"算子。在某些批次大小(并发)设置下,FIA 可能性能不佳。通过设置 `pa_shape_list`,当运行时批次大小与列出的值之一匹配时" +",vLLM-Ascend 将用 PA 算子替换 FIA 算子以防止性能下降。未来,FIA 将针对这些场景进行优化,此参数将被移除。" #: ../../source/tutorials/models/Qwen3-Dense.md:198 #, python-brace-format @@ -381,10 +364,10 @@ msgid "" "\"FULL_DECODE_ONLY\", " "\"cudagraph_capture_sizes\":[1,8,24,48,60,64,72,76]}'`." msgstr "" -"如果需要极致性能,可以启用 cudagraph_capture_sizes 参数,参考:[关键优化" -"点](./Qwen3-Dense.md#key-optimization-points)、[优化亮点](./Qwen3-" -"Dense.md#optimization-highlights)。以下是批次大小为 72 的示例:`--compilation-" -"config '{\"cudagraph_mode\": \"FULL_DECODE_ONLY\", " +"如果需要极致性能,可以启用 cudagraph_capture_sizes 参数,参考:[关键优化点](./Qwen3-Dense.md#key-" +"optimization-points)、[优化亮点](./Qwen3-Dense.md#optimization-" +"highlights)。以下是批次大小为 72 的示例:`--compilation-config '{\"cudagraph_mode\": " +"\"FULL_DECODE_ONLY\", " "\"cudagraph_capture_sizes\":[1,8,24,48,60,64,72,76]}'`。" #: ../../source/tutorials/models/Qwen3-Dense.md:201 @@ -423,7 +406,7 @@ msgid "" "Refer to [Using " "AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " "details." -msgstr "详情请参阅[使用AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" +msgstr "详情请参阅[使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" #: ../../source/tutorials/models/Qwen3-Dense.md:273 msgid "" @@ -512,11 +495,13 @@ msgid "" "Refer to [Using AISBench for performance " "evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" "performance-evaluation) for details." -msgstr "详情请参阅[使用AISBench进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" +msgstr "" +"详情请参阅[使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md" +"#execute-performance-evaluation)。" #: ../../source/tutorials/models/Qwen3-Dense.md:287 msgid "Using vLLM Benchmark" -msgstr "使用vLLM基准测试" +msgstr "使用 vLLM 基准测试" #: ../../source/tutorials/models/Qwen3-Dense.md:289 msgid "Run performance evaluation of `Qwen3-32B-W8A8` as an example." @@ -526,7 +511,7 @@ msgstr "以运行 `Qwen3-32B-W8A8` 的性能评估为例。" msgid "" "Refer to [vllm benchmark](https://docs.vllm.ai/en/latest/benchmarking/) " "for more details." -msgstr "更多详情请参阅[vllm基准测试](https://docs.vllm.ai/en/latest/benchmarking/)。" +msgstr "更多详情请参阅 [vLLM 基准测试](https://docs.vllm.ai/en/latest/benchmarking/)。" #: ../../source/tutorials/models/Qwen3-Dense.md:293 msgid "There are three `vllm bench` subcommands:" @@ -564,11 +549,11 @@ msgid "" "significantly improve the performance of Qwen Dense models. These " "techniques are designed to enhance throughput and efficiency across " "various scenarios." -msgstr "本节将介绍能显著提升Qwen Dense模型性能的关键优化点。这些技术旨在提升各种场景下的吞吐量和效率。" +msgstr "本节将介绍能显著提升 Qwen Dense 模型性能的关键优化点。这些技术旨在提升各种场景下的吞吐量和效率。" #: ../../source/tutorials/models/Qwen3-Dense.md:316 msgid "1. Rope Optimization" -msgstr "1. Rope优化" +msgstr "1. Rope 优化" #: ../../source/tutorials/models/Qwen3-Dense.md:318 msgid "" @@ -578,7 +563,9 @@ msgid "" "performed during the first layer of the forward pass. For subsequent " "layers, the position encoding is directly reused, eliminating redundant " "calculations and significantly speeding up inference in decode phase." -msgstr "Rope优化通过修改位置编码过程来提升模型效率。具体来说,它确保 `cos_sin_cache` 及相关索引选择操作仅在正向传播的第一层执行。对于后续层,位置编码被直接复用,消除了冗余计算,并显著加快了解码阶段的推理速度。" +msgstr "" +"Rope 优化通过修改位置编码过程来提升模型效率。具体来说,它确保 `cos_sin_cache` " +"及相关索引选择操作仅在正向传播的第一层执行。对于后续层,位置编码被直接复用,消除了冗余计算,并显著加快了解码阶段的推理速度。" #: ../../source/tutorials/models/Qwen3-Dense.md:320 #: ../../source/tutorials/models/Qwen3-Dense.md:326 @@ -590,14 +577,14 @@ msgstr "此优化默认启用,无需设置任何额外的环境变量。" #: ../../source/tutorials/models/Qwen3-Dense.md:322 msgid "2. AddRMSNormQuant Fusion" -msgstr "2. AddRMSNormQuant融合" +msgstr "2. AddRMSNormQuant 融合" #: ../../source/tutorials/models/Qwen3-Dense.md:324 msgid "" "AddRMSNormQuant fusion merges the Address-wise Multi-Scale Normalization " "and Quantization operations, allowing for more efficient memory access " "and computation, thereby enhancing throughput." -msgstr "AddRMSNormQuant融合将地址感知多尺度归一化与量化操作合并,实现了更高效的内存访问和计算,从而提升了吞吐量。" +msgstr "AddRMSNormQuant 融合将地址感知多尺度归一化与量化操作合并,实现了更高效的内存访问和计算,从而提升了吞吐量。" #: ../../source/tutorials/models/Qwen3-Dense.md:328 msgid "3. FlashComm_v1" @@ -612,7 +599,9 @@ msgid "" "processing. In quantization scenarios, FlashComm_v1 also reduces the " "communication overhead by decreasing the bit-level data transfer, which " "further minimizes the end-to-end latency during the prefill phase." -msgstr "FlashComm_v1通过将传统的allreduce集合通信分解为reduce-scatter和all-gather,显著提升了大批量场景下的性能。这种分解有助于减少RMSNorm令牌维度的计算,从而实现更高效的处理。在量化场景中,FlashComm_v1还通过减少比特级数据传输来降低通信开销,从而进一步最小化预填充阶段的端到端延迟。" +msgstr "" +"FlashComm_v1 通过将传统的 allreduce 集合通信分解为 reduce-scatter 和 all-" +"gather,显著提升了大批量场景下的性能。这种分解有助于减少 RMSNorm 令牌维度的计算,从而实现更高效的处理。在量化场景中,FlashComm_v1 还通过减少比特级数据传输来降低通信开销,从而进一步最小化预填充阶段的端到端延迟。" #: ../../source/tutorials/models/Qwen3-Dense.md:332 msgid "" @@ -626,7 +615,9 @@ msgid "" "exceeds the threshold. This ensures that the feature is only activated in" " scenarios where it improves performance, avoiding potential degradation " "in lower-concurrency situations." -msgstr "需要注意的是,将allreduce通信分解为reduce-scatter和all-gather操作仅在无显著通信降级的高并发场景下有益。在其他情况下,这种分解可能导致明显的性能下降。为缓解此问题,当前实现采用基于阈值的方法,仅当每个推理调度的实际令牌数超过阈值时才启用FlashComm_v1。这确保了该功能仅在能提升性能的场景下激活,避免了在低并发情况下可能出现的性能下降。" +msgstr "" +"需要注意的是,将 allreduce 通信分解为 reduce-scatter 和 all-" +"gather 操作仅在无显著通信降级的高并发场景下有益。在其他情况下,这种分解可能导致明显的性能下降。为缓解此问题,当前实现采用基于阈值的方法,仅当每个推理调度的实际令牌数超过阈值时才启用 FlashComm_v1。这确保了该功能仅在能提升性能的场景下激活,避免了在低并发情况下可能出现的性能下降。" #: ../../source/tutorials/models/Qwen3-Dense.md:334 msgid "" @@ -636,7 +627,7 @@ msgstr "此优化需要设置环境变量 `VLLM_ASCEND_ENABLE_FLASHCOMM1 = 1` #: ../../source/tutorials/models/Qwen3-Dense.md:336 msgid "4. Matmul and ReduceScatter Fusion" -msgstr "4. 矩阵乘法和ReduceScatter融合" +msgstr "4. 矩阵乘法和 ReduceScatter 融合" #: ../../source/tutorials/models/Qwen3-Dense.md:338 msgid "" @@ -648,7 +639,7 @@ msgid "" "communication steps, improves computational efficiency, and allows for " "better resource utilization, resulting in enhanced throughput, especially" " in large-scale distributed environments." -msgstr "一旦启用FlashComm_v1,可以应用额外的优化。此优化融合了矩阵乘法和ReduceScatter操作,并包含分片优化。矩阵乘法计算被视为一个流水线,而ReduceScatter和反量化操作则在另一个独立的流水线中处理。这种方法显著减少了通信步骤,提高了计算效率,并实现了更好的资源利用,从而提升了吞吐量,尤其在大规模分布式环境中效果显著。" +msgstr "一旦启用 FlashComm_v1,可以应用额外的优化。此优化融合了矩阵乘法和 ReduceScatter 操作,并包含分片优化。矩阵乘法计算被视为一个流水线,而 ReduceScatter 和反量化操作则在另一个独立的流水线中处理。这种方法显著减少了通信步骤,提高了计算效率,并实现了更好的资源利用,从而提升了吞吐量,尤其在大规模分布式环境中效果显著。" #: ../../source/tutorials/models/Qwen3-Dense.md:340 msgid "" @@ -658,7 +649,7 @@ msgid "" " is currently used to mitigate this problem. The optimization is only " "applied when the token count exceeds the threshold, ensuring that it is " "not enabled in cases where it could negatively impact performance." -msgstr "此优化在FlashComm_v1激活后会自动启用。然而,由于融合后在小并发场景下存在性能下降的问题,目前采用基于阈值的方法来缓解此问题。该优化仅在令牌数超过阈值时应用,确保在可能对性能产生负面影响的情况下不被启用。" +msgstr "此优化在 FlashComm_v1 激活后会自动启用。然而,由于融合后在小并发场景下存在性能下降的问题,目前采用基于阈值的方法来缓解此问题。该优化仅在令牌数超过阈值时应用,确保在可能对性能产生负面影响的情况下不被启用。" #: ../../source/tutorials/models/Qwen3-Dense.md:342 msgid "5. Weight Prefetching" @@ -681,7 +672,7 @@ msgid "" "preloaded to L2 cache ahead of time, reducing MTE utilization during the " "MLP computations and indirectly improving Cube computation efficiency by " "minimizing resource contention and optimizing data flow." -msgstr "在稠密模型场景中,MLP的gate_up_proj和down_proj线性层通常表现出相对较高的MTE利用率。为解决此问题,我们创建了一个专门用于权重预取的独立流水线,该流水线与MLP之前的原始向量计算流水线(如RMSNorm和SiLU)并行运行。这种方法允许权重提前预加载到L2缓存中,从而降低MLP计算期间的MTE利用率,并通过最小化资源争用和优化数据流,间接提升Cube计算效率。" +msgstr "在稠密模型场景中,MLP 的 gate_up_proj 和 down_proj 线性层通常表现出相对较高的 MTE 利用率。为解决此问题,我们创建了一个专门用于权重预取的独立流水线,该流水线与 MLP 之前的原始向量计算流水线(如 RMSNorm 和 SiLU)并行运行。这种方法允许权重提前预加载到 L2 缓存中,从而降低 MLP 计算期间的 MTE 利用率,并通过最小化资源争用和优化数据流,间接提升 Cube 计算效率。" #: ../../source/tutorials/models/Qwen3-Dense.md:348 #, python-brace-format @@ -695,11 +686,17 @@ msgid "" "\"enabled\": true, \"prefetch_ratio\": { \"mlp\": { \"gate_up\": 1.0, " "\"down\": 1.0}}}. See User Guide->Feature Guide->Weight Prefetch Guide " "for details." -msgstr "之前用于启用MLP权重预取的环境变量 `VLLM_ASCEND_ENABLE_PREFETCH_MLP`,以及用于设置MLP gate_up_proj和down_proj权重预取大小的 `VLLM_ASCEND_MLP_GATE_UP_PREFETCH_SIZE` 和 `VLLM_ASCEND_MLP_DOWN_PREFETCH_SIZE` 已被弃用。请改用以下配置:`\"weight_prefetch_config\": { \"enabled\": true, \"prefetch_ratio\": { \"mlp\": { \"gate_up\": 1.0, \"down\": 1.0}}}`。详情请参阅用户指南->功能指南->权重预取指南。" +msgstr "" +"此前用于启用MLP权重预取的环境变量 `VLLM_ASCEND_ENABLE_PREFETCH_MLP`,以及用于设置MLP " +"gate_up_proj和down_proj权重预取大小的 `VLLM_ASCEND_MLP_GATE_UP_PREFETCH_SIZE` 和 " +"`VLLM_ASCEND_MLP_DOWN_PREFETCH_SIZE` " +"已被弃用。请改用以下配置:`\"weight_prefetch_config\": { \"enabled\": true, " +"\"prefetch_ratio\": { \"mlp\": { \"gate_up\": 1.0, \"down\": " +"1.0}}}`。详情请参阅用户指南->功能指南->权重预取指南。" #: ../../source/tutorials/models/Qwen3-Dense.md:350 msgid "6. Zerolike Elimination" -msgstr "6. Zerolike消除" +msgstr "6. 类零消除" #: ../../source/tutorials/models/Qwen3-Dense.md:352 msgid "" @@ -731,7 +728,9 @@ msgid "" "The configuration compilation_config = { \"cudagraph_mode\": " "\"FULL_DECODE_ONLY\"} is used when starting the service. This setup is " "necessary to enable the aclgraph's full decode-only mode." -msgstr "启动服务时使用配置 `compilation_config = { \"cudagraph_mode\": \"FULL_DECODE_ONLY\"}`。此设置对于启用aclgraph的完全仅解码模式是必需的。" +msgstr "" +"启动服务时使用配置 `compilation_config = { \"cudagraph_mode\": " +"\"FULL_DECODE_ONLY\"}`。此设置对于启用aclgraph的完全仅解码模式是必需的。" #: ../../source/tutorials/models/Qwen3-Dense.md:362 msgid "8. Asynchronous Scheduling" @@ -785,13 +784,11 @@ msgid "" "18MB. The reason for this is that, at this value, the vector computations" " of RMSNorm and SiLU can effectively hide the prefetch stream, thereby " "accelerating the Matmul computations of the two linear layers." -msgstr "" -"例如,在上述实际场景中,我将MLP中gate_up_proj和down_proj的预取缓冲区大小设置为18MB。" -"这样做的原因是,在此数值下,RMSNorm和SiLU的向量计算能够有效隐藏预取流,从而加速两个线性层的Matmul计算。" +msgstr "例如,在上述实际场景中,我将MLP中gate_up_proj和down_proj的预取缓冲区大小设置为18MB。这样做的原因是,在此数值下,RMSNorm和SiLU的向量计算能够有效隐藏预取流,从而加速两个线性层的Matmul计算。" #: ../../source/tutorials/models/Qwen3-Dense.md:378 msgid "2.Max-num-batched-tokens" -msgstr "2.最大批处理令牌数" +msgstr "2. 最大批处理令牌数" #: ../../source/tutorials/models/Qwen3-Dense.md:380 msgid "" @@ -802,24 +799,22 @@ msgid "" "processed per batch, potentially leading to inefficiencies. Conversely, " "setting it too large increases the risk of Out of Memory (OOM) errors due" " to excessive memory consumption." -msgstr "" -"最大批处理令牌数参数决定了单批次可处理的令牌数量上限。调整此值有助于平衡吞吐量与内存使用。" -"若设置过小,每批次处理的令牌数较少,可能降低效率,从而对端到端性能产生负面影响。" -"反之,若设置过大,则会因内存消耗过高而增加内存溢出(OOM)错误的风险。" +msgstr "最大批处理令牌数参数决定了单批次可处理的令牌数量上限。调整此值有助于平衡吞吐量与内存使用。若设置过小,每批次处理的令牌数较少,可能降低效率,从而对端到端性能产生负面影响。反之,若设置过大,则会因内存消耗过高而增加内存溢出(OOM)错误的风险。" #: ../../source/tutorials/models/Qwen3-Dense.md:382 msgid "" "In the above real-world scenario, we not only conducted extensive testing" " to determine the most cost-effective value, but also took into account " "the accumulation of decode tokens when enabling chunked prefill. If the " -"value is set too small, a single request may被分块多次,并且在推理的早期阶段,一个批次可能只包含少量解码令牌。这可能导致端到端吞吐量达不到预期。" -msgstr "" -"在上述实际场景中,我们不仅通过大量测试确定了最具性价比的数值,还考虑了启用分块预填充时解码令牌的累积问题。" -"若该值设置过小,单个请求可能被多次分块处理,且在推理早期阶段,单个批次可能仅包含少量解码令牌,从而导致端到端吞吐量无法达到预期。" +"value is set too small, a single request may be chunked multiple times, " +"and during the early stages of inference, a batch may contain only a " +"small number of decode tokens. This can result in the end-to-end " +"throughput falling short of expectations." +msgstr "在上述实际场景中,我们不仅通过大量测试确定了最具性价比的数值,还考虑了启用分块预填充时解码令牌的累积问题。若该值设置过小,单个请求可能被多次分块处理,且在推理早期阶段,单个批次可能仅包含少量解码令牌,从而导致端到端吞吐量无法达到预期。" #: ../../source/tutorials/models/Qwen3-Dense.md:384 msgid "3.Cudagraph_capture_sizes" -msgstr "3.CUDA图捕获尺寸" +msgstr "3. CUDA图捕获尺寸" #: ../../source/tutorials/models/Qwen3-Dense.md:386 msgid "" @@ -827,8 +822,7 @@ msgid "" "captures during the inference process. Adjusting this value determines " "how much of the computation graph is captured at once, which can " "significantly impact both performance and memory usage." -msgstr "" -"CUDA图捕获尺寸参数控制推理过程中图捕获的粒度。调整此值决定了单次捕获的计算图范围,这对性能和内存使用均有显著影响。" +msgstr "CUDA图捕获尺寸参数控制推理过程中图捕获的粒度。调整此值决定了单次捕获的计算图范围,这对性能和内存使用均有显著影响。" #: ../../source/tutorials/models/Qwen3-Dense.md:388 msgid "" @@ -839,9 +833,7 @@ msgid "" " between two sizes, the framework will automatically pad the token count " "to the larger size. This often leads to actual performance deviating from" " the expected or even degrading." -msgstr "" -"若未手动指定此列表,系统将自动填充一系列均匀分布的值,这通常能保证良好性能。" -"但若需进一步微调,手动指定数值将获得更佳效果。这是因为当批次大小介于两个尺寸之间时,框架会自动将令牌数填充至较大尺寸,这常导致实际性能偏离预期甚至下降。" +msgstr "若未手动指定此列表,系统将自动填充一系列均匀分布的值,这通常能保证良好性能。但若需进一步微调,手动指定数值将获得更佳效果。这是因为当批次大小介于两个尺寸之间时,框架会自动将令牌数填充至较大尺寸,这常导致实际性能偏离预期甚至下降。" #: ../../source/tutorials/models/Qwen3-Dense.md:390 msgid "" @@ -850,9 +842,7 @@ msgid "" "actually included in the cudagraph_capture_sizes list. This way, during " "the decode phase, padding operations are essentially avoided, ensuring " "the reliability of the experimental data." -msgstr "" -"因此,如上述实际场景所示,在调整基准测试请求并发度时,我们始终确保并发度实际包含在CUDA图捕获尺寸列表中。" -"这样在解码阶段基本避免了填充操作,从而保证了实验数据的可靠性。" +msgstr "因此,如上述实际场景所示,在调整基准测试请求并发度时,我们始终确保并发度实际包含在CUDA图捕获尺寸列表中。这样在解码阶段基本避免了填充操作,从而保证了实验数据的可靠性。" #: ../../source/tutorials/models/Qwen3-Dense.md:392 msgid "" @@ -861,6 +851,4 @@ msgid "" "not meet this condition will be automatically filtered out. Therefore, I " "recommend incrementally adding concurrency based on the TP size after " "enabling FlashComm_v1." -msgstr "" -"需特别注意,若启用FlashComm_v1,此列表中的值必须是TP尺寸的整数倍。不满足此条件的任何值都将被自动过滤。" -"因此,建议在启用FlashComm_v1后,基于TP尺寸逐步增加并发度。" +msgstr "需特别注意,若启用FlashComm_v1,此列表中的值必须是TP尺寸的整数倍。不满足此条件的任何值都将被自动过滤。因此,建议在启用FlashComm_v1后,基于TP尺寸逐步增加并发度。" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.po index 7d0240d5..8748398b 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -37,7 +37,9 @@ msgid "" "equipped with chain-of-thought reasoning, supporting audio, video, and " "text input, with text output." msgstr "" -"Qwen3-Omni 是原生端到端多语言全模态基础模型。它能处理文本、图像、音频和视频,并以文本和自然语音形式提供实时流式响应。我们引入了多项架构升级以提升性能和效率。Qwen3-Omni-30B-A3B 的 Thinking 模型包含思考器组件,具备思维链推理能力,支持音频、视频和文本输入,输出为文本。" +"Qwen3-Omni " +"是原生端到端多语言全模态基础模型。它能处理文本、图像、音频和视频,并以文本和自然语音形式提供实时流式响应。我们引入了多项架构升级以提升性能和效率。Qwen3" +"-Omni-30B-A3B 的 Thinking 模型包含思考器组件,具备思维链推理能力,支持音频、视频和文本输入,输出为文本。" #: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:7 msgid "" @@ -55,14 +57,18 @@ msgid "" "Refer to [supported features](https://docs.vllm.ai/projects/ascend/zh-" "cn/latest/user_guide/support_matrix/supported_models.html) to get the " "model's supported feature matrix." -msgstr "请参考 [支持的功能](https://docs.vllm.ai/projects/ascend/zh-cn/latest/user_guide/support_matrix/supported_models.html) 以获取模型支持的功能矩阵。" +msgstr "" +"请参考 [支持的功能](https://docs.vllm.ai/projects/ascend/zh-" +"cn/latest/user_guide/support_matrix/supported_models.html) 以获取模型支持的功能矩阵。" #: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:13 msgid "" "Refer to [feature guide](https://docs.vllm.ai/projects/ascend/zh-" "cn/latest/user_guide/feature_guide/index.html) to get the feature's " "configuration." -msgstr "请参考 [功能指南](https://docs.vllm.ai/projects/ascend/zh-cn/latest/user_guide/feature_guide/index.html) 以获取功能的配置信息。" +msgstr "" +"请参考 [功能指南](https://docs.vllm.ai/projects/ascend/zh-" +"cn/latest/user_guide/feature_guide/index.html) 以获取功能的配置信息。" #: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:15 msgid "Environment Preparation" @@ -74,18 +80,20 @@ msgstr "模型权重" #: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:19 msgid "" -"`Qwen3-Omni-30B-A3B-Thinking` requires 2 NPU Cards(64G × 2).[Download " +"`Qwen3-Omni-30B-A3B-Thinking` requires 2 NPU Cards (64G × 2).[Download " "model weight](https://modelscope.cn/models/Qwen/Qwen3-Omni-30B-A3B-" "Thinking) It is recommended to download the model weight to the shared " "directory of multiple nodes, such as `/root/.cache/`" msgstr "" -"`Qwen3-Omni-30B-A3B-Thinking` 需要 2 张 NPU 卡 (64G × 2)。[下载模型权重](https://modelscope.cn/models/Qwen/Qwen3-Omni-30B-A3B-Thinking)。建议将模型权重下载到多节点的共享目录,例如 `/root/.cache/`。" +"`Qwen3-Omni-30B-A3B-Thinking` 需要 2 张 NPU 卡 (64G × " +"2)。[下载模型权重](https://modelscope.cn/models/Qwen/Qwen3-Omni-30B-A3B-" +"Thinking)。建议将模型权重下载到多节点的共享目录,例如 `/root/.cache/`。" #: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:22 msgid "Installation" msgstr "安装" -#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:24 msgid "Use docker image" msgstr "使用 Docker 镜像" @@ -100,9 +108,11 @@ msgid "" "Select an image based on your machine type and start the docker image on " "your node, refer to [using docker](../../installation.md#set-up-using-" "docker)." -msgstr "根据您的机器类型选择镜像并在节点上启动 Docker 镜像,请参考 [使用 Docker](../../installation.md#set-up-using-docker)。" +msgstr "" +"根据您的机器类型选择镜像并在节点上启动 Docker 镜像,请参考 [使用 Docker](../../installation.md#set-" +"up-using-docker)。" -#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:32 msgid "Build from source" msgstr "从源码构建" @@ -114,7 +124,9 @@ msgstr "您可以从源码构建所有组件。" msgid "" "Install `vllm-ascend`, refer to [set up using " "python](../../installation.md#set-up-using-python)." -msgstr "安装 `vllm-ascend`,请参考 [使用 Python 设置](../../installation.md#set-up-using-python)。" +msgstr "" +"安装 `vllm-ascend`,请参考 [使用 Python 设置](../../installation.md#set-up-using-" +"python)。" #: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:71 msgid "Please install system dependencies" @@ -146,7 +158,9 @@ msgid "" "Atlas A2 with 64 GB of NPU card memory, tensor-parallel-size should be at" " least 1, and for 32 GB of memory, tensor-parallel-size should be at " "least 2." -msgstr "运行以下脚本在多 NPU 上启动 vLLM 服务器:对于具有 64 GB NPU 卡内存的 Atlas A2,tensor-parallel-size 应至少为 1;对于 32 GB 内存,tensor-parallel-size 应至少为 2。" +msgstr "" +"运行以下脚本在多 NPU 上启动 vLLM 服务器:对于具有 64 GB NPU 卡内存的 Atlas A2,tensor-parallel-" +"size 应至少为 1;对于 32 GB 内存,tensor-parallel-size 应至少为 2。" #: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:188 msgid "Functional Verification" @@ -173,25 +187,31 @@ msgid "" "As an example, take the `gsm8k` `omnibench` `bbh` dataset as a test " "dataset, and run accuracy evaluation of `Qwen3-Omni-30B-A3B-Thinking` in " "online mode." -msgstr "以 `gsm8k`、`omnibench`、`bbh` 数据集作为测试数据集为例,在在线模式下运行 `Qwen3-Omni-30B-A3B-Thinking` 的精度评估。" +msgstr "" +"以 `gsm8k`、`omnibench`、`bbh` 数据集作为测试数据集为例,在在线模式下运行 `Qwen3-Omni-30B-A3B-" +"Thinking` 的精度评估。" #: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:239 msgid "" "Refer to Using " "evalscope() for `evalscope`installation." -msgstr "关于 `evalscope` 的安装,请参考使用 evalscope ()。" +msgstr "" +"关于 `evalscope` 的安装,请参考使用 evalscope " +"()。" #: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:240 msgid "Run `evalscope` to execute the accuracy evaluation." msgstr "运行 `evalscope` 以执行精度评估。" #: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:255 -#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:296 msgid "" "After execution, you can get the result, here is the result of `Qwen3" "-Omni-30B-A3B-Thinking` in vllm-ascend:0.13.0rc1 for reference only." -msgstr "执行后,您可以获得结果。以下是 `Qwen3-Omni-30B-A3B-Thinking` 在 vllm-ascend:0.13.0rc1 中的结果,仅供参考。" +msgstr "" +"执行后,您可以获得结果。以下是 `Qwen3-Omni-30B-A3B-Thinking` 在 vllm-ascend:0.13.0rc1 " +"中的结果,仅供参考。" #: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:269 msgid "Performance" @@ -207,7 +227,9 @@ msgid "" "example. Refer to vllm benchmark for more details. Refer to [vllm " "benchmark](https://docs.vllm.ai/en/latest/benchmarking/) for more " "details." -msgstr "以运行 `Qwen3-Omni-30B-A3B-Thinking` 的性能评估为例。更多详情请参考 vllm 基准测试。更多详情请参考 [vllm 基准测试](https://docs.vllm.ai/en/latest/benchmarking/)。" +msgstr "" +"以运行 `Qwen3-Omni-30B-A3B-Thinking` 的性能评估为例。更多详情请参考 vllm 基准测试。更多详情请参考 [vllm" +" 基准测试](https://docs.vllm.ai/en/latest/benchmarking/)。" #: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:277 msgid "There are three `vllm bench` subcommands:" @@ -227,4 +249,12 @@ msgstr "`throughput`:对离线推理吞吐量进行基准测试。" #: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:283 msgid "Take the `serve` as an example. Run the code as follows." -msgstr "以 `serve` 为例。按如下方式运行代码。" \ No newline at end of file +msgstr "以 `serve` 为例。按如下方式运行代码。" + +#: ../../source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md:296 +msgid "" +"After execution, you can get the result, here is the result of `Qwen3" +"-Omni-30B-A3B-Thinking` in vllm-ascend:0.13.0rc1 for reference only." +msgstr "" +"执行后,您可以获得结果。以下是 `Qwen3-Omni-30B-A3B-Thinking` 在 vllm-ascend:0.13.0rc1 " +"中的结果,仅供参考。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-397B-A17B.po b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-397B-A17B.po index ba36bd91..ea3e3ab1 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-397B-A17B.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-397B-A17B.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -79,7 +79,10 @@ msgid "" "`Qwen3.5-397B-A17B`(BF16 version): require 2 Atlas 800 A3 (64G × 16) " "nodes or 4 Atlas 800 A2 (64G × 8) nodes. [Download model " "weight](https://www.modelscope.cn/models/Qwen/Qwen3.5-397B-A17B)" -msgstr "`Qwen3.5-397B-A17B` (BF16 版本):需要 2 个 Atlas 800 A3 (64G × 16) 节点或 4 个 Atlas 800 A2 (64G × 8) 节点。[下载模型权重](https://www.modelscope.cn/models/Qwen/Qwen3.5-397B-A17B)" +msgstr "" +"`Qwen3.5-397B-A17B` (BF16 版本):需要 2 个 Atlas 800 A3 (64G × 16) 节点或 4 个 " +"Atlas 800 A2 (64G × 8) " +"节点。[下载模型权重](https://www.modelscope.cn/models/Qwen/Qwen3.5-397B-A17B)" #: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:22 msgid "" @@ -87,7 +90,10 @@ msgid "" "× 16) node or 2 Atlas 800 A2 (64G × 8) nodes. [Download model " "weight](https://www.modelscope.cn/models/Eco-Tech/Qwen3.5-397B-A17B-" "w8a8-mtp)" -msgstr "`Qwen3.5-397B-A17B-w8a8` (量化版本):需要 1 个 Atlas 800 A3 (64G × 16) 节点或 2 个 Atlas 800 A2 (64G × 8) 节点。[下载模型权重](https://www.modelscope.cn/models/Eco-Tech/Qwen3.5-397B-A17B-w8a8-mtp)" +msgstr "" +"`Qwen3.5-397B-A17B-w8a8` (量化版本):需要 1 个 Atlas 800 A3 (64G × 16) 节点或 2 个 " +"Atlas 800 A2 (64G × 8) 节点。[下载模型权重](https://www.modelscope.cn/models/Eco-" +"Tech/Qwen3.5-397B-A17B-w8a8-mtp)" #: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:24 msgid "" @@ -104,13 +110,15 @@ msgid "" "If you want to deploy multi-node environment, you need to verify multi-" "node communication according to [verify multi-node communication " "environment](../../installation.md#verify-multi-node-communication)." -msgstr "如果您想部署多节点环境,需要根据[验证多节点通信环境](../../installation.md#verify-multi-node-communication)来验证多节点通信。" +msgstr "" +"如果您想部署多节点环境,需要根据[验证多节点通信环境](../../installation.md#verify-multi-node-" +"communication)来验证多节点通信。" #: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:30 msgid "Installation" msgstr "安装" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:34 msgid "Use docker image" msgstr "使用 Docker 镜像" @@ -119,16 +127,20 @@ msgid "" "For example, using images `quay.io/ascend/vllm-ascend:v0.17.0rc1`(for " "Atlas 800 A2) and `quay.io/ascend/vllm-ascend:v0.17.0rc1-a3`(for Atlas " "800 A3)." -msgstr "例如,使用镜像 `quay.io/ascend/vllm-ascend:v0.17.0rc1`(适用于 Atlas 800 A2)和 `quay.io/ascend/vllm-ascend:v0.17.0rc1-a3`(适用于 Atlas 800 A3)。" +msgstr "" +"例如,使用镜像 `quay.io/ascend/vllm-ascend:v0.17.0rc1`(适用于 Atlas 800 A2)和 " +"`quay.io/ascend/vllm-ascend:v0.17.0rc1-a3`(适用于 Atlas 800 A3)。" #: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:38 msgid "" "Select an image based on your machine type and start the docker image on " "your node, refer to [using docker](../../installation.md#set-up-using-" "docker)." -msgstr "根据您的机器类型选择镜像并在节点上启动 Docker 镜像,请参考[使用 Docker](../../installation.md#set-up-using-docker)。" +msgstr "" +"根据您的机器类型选择镜像并在节点上启动 Docker 镜像,请参考[使用 Docker](../../installation.md#set-" +"up-using-docker)。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:76 msgid "Build from source" msgstr "从源码构建" @@ -140,7 +152,9 @@ msgstr "您可以从源码构建所有组件。" msgid "" "Install `vllm-ascend`, refer to [set up using " "python](../../installation.md#set-up-using-python)." -msgstr "安装 `vllm-ascend`,请参考[使用 Python 设置](../../installation.md#set-up-using-python)。" +msgstr "" +"安装 `vllm-ascend`,请参考[使用 Python 设置](../../installation.md#set-up-using-" +"python)。" #: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:84 msgid "" @@ -158,39 +172,42 @@ msgstr "单节点部署" #: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:90 msgid "" -"`Qwen3.5-397B-A17B` can be deployed on 2 Atlas 800 A3(64G*16) or 4 Atlas " -"800 A2(64G*8). `Qwen3.5-397B-A17B-w8a8` can be deployed on 1 Atlas 800 " -"A3(64G*16) or 2 Atlas 800 A2(64G*8), need to start with parameter " -"`--quantization ascend`." -msgstr "`Qwen3.5-397B-A17B` 可以部署在 2 个 Atlas 800 A3(64G*16) 或 4 个 Atlas 800 A2(64G*8) 上。`Qwen3.5-397B-A17B-w8a8` 可以部署在 1 个 Atlas 800 A3(64G*16) 或 2 个 Atlas 800 A2(64G*8) 上,需要使用参数 `--quantization ascend` 启动。" +"`Qwen3.5-397B-A17B-w8a8` can be deployed on 1 Atlas 800 A3(64G*16) or 2 " +"Atlas 800 A2(64G*8), need to start with parameter `--quantization " +"ascend`." +msgstr "" +"`Qwen3.5-397B-A17B-w8a8` 可以部署在 1 个 Atlas 800 A3(64G*16) 或 2 个 Atlas 800 " +"A2(64G*8) 上,需要使用参数 `--quantization ascend` 启动。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:93 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:92 msgid "" "Run the following script to execute online 128k inference On 1 Atlas 800 " "A3(64G*16)." msgstr "在 1 个 Atlas 800 A3(64G*16) 上运行以下脚本以执行在线 128k 推理。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:134 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:133 msgid "**Notice:**" msgstr "**注意:**" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:136 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:135 msgid "The parameters are explained as follows:" msgstr "参数解释如下:" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:138 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:137 msgid "" "`--data-parallel-size` 1 and `--tensor-parallel-size` 16 are common " "settings for data parallelism (DP) and tensor parallelism (TP) sizes." -msgstr "`--data-parallel-size` 1 和 `--tensor-parallel-size` 16 是数据并行 (DP) 和张量并行 (TP) 大小的常见设置。" +msgstr "" +"`--data-parallel-size` 1 和 `--tensor-parallel-size` 16 是数据并行 (DP) 和张量并行 " +"(TP) 大小的常见设置。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:139 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:138 msgid "" "`--max-model-len` represents the context length, which is the maximum " "value of the input plus output for a single request." msgstr "`--max-model-len` 表示上下文长度,即单个请求的输入加输出的最大值。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:140 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:139 msgid "" "`--max-num-seqs` indicates the maximum number of requests that each DP " "group is allowed to process. If the number of requests sent to the " @@ -199,36 +216,44 @@ msgid "" "state is also counted in metrics such as TTFT and TPOT. Therefore, when " "testing performance, it is generally recommended that `--max-num-seqs` * " "`--data-parallel-size` >= the actual total concurrency." -msgstr "`--max-num-seqs` 表示每个 DP 组允许处理的最大请求数。如果发送到服务的请求数超过此限制,多余的请求将保持在等待状态,不会被调度。请注意,在等待状态所花费的时间也会计入 TTFT 和 TPOT 等指标。因此,在测试性能时,通常建议 `--max-num-seqs` * `--data-parallel-size` >= 实际总并发数。" +msgstr "" +"`--max-num-seqs` 表示每个 DP " +"组允许处理的最大请求数。如果发送到服务的请求数超过此限制,多余的请求将保持在等待状态,不会被调度。请注意,在等待状态所花费的时间也会计入 TTFT" +" 和 TPOT 等指标。因此,在测试性能时,通常建议 `--max-num-seqs` * `--data-parallel-size` >= " +"实际总并发数。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:141 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:140 msgid "" "`--max-num-batched-tokens` represents the maximum number of tokens that " "the model can process in a single step. Currently, vLLM v1 scheduling " "enables ChunkPrefill/SplitFuse by default, which means:" -msgstr "`--max-num-batched-tokens` 表示模型单步可以处理的最大 token 数。目前,vLLM v1 调度默认启用 ChunkPrefill/SplitFuse,这意味着:" +msgstr "" +"`--max-num-batched-tokens` 表示模型单步可以处理的最大 token 数。目前,vLLM v1 调度默认启用 " +"ChunkPrefill/SplitFuse,这意味着:" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:142 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:141 msgid "" "(1) If the input length of a request is greater than `--max-num-batched-" "tokens`, it will be divided into multiple rounds of computation according" " to `--max-num-batched-tokens`;" -msgstr "(1) 如果请求的输入长度大于 `--max-num-batched-tokens`,它将根据 `--max-num-batched-tokens` 被分成多轮计算;" +msgstr "" +"(1) 如果请求的输入长度大于 `--max-num-batched-tokens`,它将根据 `--max-num-batched-" +"tokens` 被分成多轮计算;" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:143 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:142 msgid "" "(2) Decode requests are prioritized for scheduling, and prefill requests " "are scheduled only if there is available capacity." msgstr "(2) 解码请求优先调度,只有在有可用容量时才调度预填充请求。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:144 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:143 msgid "" "Generally, if `--max-num-batched-tokens` is set to a larger value, the " "overall latency will be lower, but the pressure on GPU memory (activation" " value usage) will be greater." msgstr "通常,如果 `--max-num-batched-tokens` 设置得较大,整体延迟会更低,但 GPU 内存(激活值使用)的压力会更大。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:145 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:144 msgid "" "`--gpu-memory-utilization` represents the proportion of HBM that vLLM " "will use for actual inference. Its essential function is to calculate the" @@ -242,16 +267,24 @@ msgid "" "during actual inference (e.g., due to uneven EP load), setting `--gpu-" "memory-utilization` too high may lead to OOM (Out of Memory) issues " "during actual inference. The default value is `0.9`." -msgstr "`--gpu-memory-utilization` 表示 vLLM 将用于实际推理的 HBM 比例。其核心功能是计算可用的 kv_cache 大小。在预热阶段(vLLM 中称为 profile run),vLLM 会记录输入大小为 `--max-num-batched-tokens` 的推理过程中的峰值 GPU 内存使用量。然后,可用的 kv_cache 大小计算为:`--gpu-memory-utilization` * HBM 大小 - 峰值 GPU 内存使用量。因此,`--gpu-memory-utilization` 的值越大,可用的 kv_cache 就越多。然而,由于预热阶段的 GPU 内存使用量可能与实际推理时不同(例如,由于 EP 负载不均),将 `--gpu-memory-utilization` 设置得过高可能导致实际推理时出现 OOM(内存不足)问题。默认值为 `0.9`。" +msgstr "" +"`--gpu-memory-utilization` 表示 vLLM 将用于实际推理的 HBM 比例。其核心功能是计算可用的 kv_cache " +"大小。在预热阶段(vLLM 中称为 profile run),vLLM 会记录输入大小为 `--max-num-batched-tokens` " +"的推理过程中的峰值 GPU 内存使用量。然后,可用的 kv_cache 大小计算为:`--gpu-memory-utilization` * " +"HBM 大小 - 峰值 GPU 内存使用量。因此,`--gpu-memory-utilization` 的值越大,可用的 kv_cache " +"就越多。然而,由于预热阶段的 GPU 内存使用量可能与实际推理时不同(例如,由于 EP 负载不均),将 `--gpu-memory-" +"utilization` 设置得过高可能导致实际推理时出现 OOM(内存不足)问题。默认值为 `0.9`。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:146 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:145 msgid "" "`--enable-expert-parallel` indicates that EP is enabled. Note that vLLM " "does not support a mixed approach of ETP and EP; that is, MoE can either " "use pure EP or pure TP." -msgstr "`--enable-expert-parallel` 表示启用了 EP。请注意,vLLM 不支持 ETP 和 EP 的混合方法;也就是说,MoE 要么使用纯 EP,要么使用纯 TP。" +msgstr "" +"`--enable-expert-parallel` 表示启用了 EP。请注意,vLLM 不支持 ETP 和 EP 的混合方法;也就是说,MoE " +"要么使用纯 EP,要么使用纯 TP。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:147 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:146 msgid "" "`--no-enable-prefix-caching` indicates that prefix caching is disabled. " "To enable it, for mamba-like models Qwen3.5, set `--enable-prefix-" @@ -259,15 +292,19 @@ msgid "" "implementation of hybrid kv cache might result in a very large block_size" " when scheduling. For example, the block_size may be adjusted to 2048, " "which means that any prefix shorter than 2048 will never be cached." -msgstr "`--no-enable-prefix-caching` 表示前缀缓存被禁用。要启用它,对于类似 Mamba 的模型 Qwen3.5,请设置 `--enable-prefix-caching` 和 `--mamba-cache-mode align`。请注意,当前混合 kv cache 的实现可能在调度时导致非常大的 block_size。例如,block_size 可能被调整为 2048,这意味着任何短于 2048 的前缀将永远不会被缓存。" +msgstr "" +"`--no-enable-prefix-caching` 表示前缀缓存被禁用。要启用它,对于类似 Mamba 的模型 Qwen3.5,请设置 " +"`--enable-prefix-caching` 和 `--mamba-cache-mode align`。请注意,当前混合 kv cache " +"的实现可能在调度时导致非常大的 block_size。例如,block_size 可能被调整为 2048,这意味着任何短于 2048 " +"的前缀将永远不会被缓存。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:148 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:147 msgid "" "`--quantization` \"ascend\" indicates that quantization is used. To " "disable quantization, remove this option." msgstr "`--quantization` \"ascend\" 表示使用了量化。要禁用量化,请移除此选项。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:149 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:148 msgid "" "`--compilation-config` contains configurations related to the aclgraph " "graph mode. The most significant configurations are \"cudagraph_mode\" " @@ -276,9 +313,13 @@ msgid "" "\"PIECEWISE\" and \"FULL_DECODE_ONLY\" are supported. The graph mode is " "mainly used to reduce the cost of operator dispatch. Currently, " "\"FULL_DECODE_ONLY\" is recommended." -msgstr "`--compilation-config` 包含与 aclgraph 图模式相关的配置。最重要的配置是 \"cudagraph_mode\" 和 \"cudagraph_capture_sizes\",其含义如下:\"cudagraph_mode\":表示特定的图模式。目前支持 \"PIECEWISE\" 和 \"FULL_DECODE_ONLY\"。图模式主要用于降低算子调度的开销。目前推荐使用 \"FULL_DECODE_ONLY\"。" +msgstr "" +"`--compilation-config` 包含与 aclgraph 图模式相关的配置。最重要的配置是 \"cudagraph_mode\" 和" +" \"cudagraph_capture_sizes\",其含义如下:\"cudagraph_mode\":表示特定的图模式。目前支持 " +"\"PIECEWISE\" 和 \"FULL_DECODE_ONLY\"。图模式主要用于降低算子调度的开销。目前推荐使用 " +"\"FULL_DECODE_ONLY\"。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:151 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:150 msgid "" "\"cudagraph_capture_sizes\": represents different levels of graph modes. " "The default value is [1, 2, 4, 8, 16, 24, 32, 40,..., `--max-num-seqs`]. " @@ -286,164 +327,124 @@ msgid "" " inputs between levels are automatically padded to the next level. " "Currently, the default setting is recommended. Only in some scenarios is " "it necessary to set this separately to achieve optimal performance." -msgstr "\"cudagraph_capture_sizes\":表示不同级别的图模式。默认值为 [1, 2, 4, 8, 16, 24, 32, 40,..., `--max-num-seqs`]。在图模式下,不同级别图的输入是固定的,级别之间的输入会自动填充到下一个级别。目前推荐使用默认设置。只有在某些场景下,才需要单独设置此参数以达到最佳性能。" +msgstr "" +"\"cudagraph_capture_sizes\":表示不同级别的图模式。默认值为 [1, 2, 4, 8, 16, 24, 32, " +"40,..., `--max-num-" +"seqs`]。在图模式下,不同级别图的输入是固定的,级别之间的输入会自动填充到下一个级别。目前推荐使用默认设置。只有在某些场景下,才需要单独设置此参数以达到最佳性能。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:153 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:152 msgid "Multi-node Deployment with MP (Recommended)" msgstr "使用 MP 的多节点部署(推荐)" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:155 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:154 msgid "" "Assume you have 2 Atlas 800 A2 nodes, and want to deploy the `Qwen3.5" -"-397B-A17B` model across multiple nodes." -msgstr "假设您有 2 个 Atlas 800 A2 节点,并希望跨多个节点部署 `Qwen3.5-397B-A17B` 模型。" +"-397B-A17B-w8a8-mtp` model across multiple nodes." +msgstr "假设您有 2 个 Atlas 800 A2 节点,并希望跨多个节点部署 `Qwen3.5-397B-A17B-w8a8-mtp` 模型。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:157 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:156 msgid "Node 0" msgstr "节点 0" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:203 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:202 msgid "Node1" msgstr "节点 1" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:253 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:252 msgid "" "If the service starts successfully, the following information will be " "displayed on node 0:" msgstr "如果服务启动成功,节点 0 上将显示以下信息:" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:264 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:263 msgid "Multi-node Deployment with Ray" msgstr "使用 Ray 的多节点部署" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:266 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:265 msgid "refer to [Ray Distributed (Qwen/Qwen3-235B-A22B)](../features/ray.md)." msgstr "请参考 [Ray 分布式 (Qwen/Qwen3-235B-A22B)](../features/ray.md)。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:268 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:267 msgid "Prefill-Decode Disaggregation" msgstr "预填充-解码解耦" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:270 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:269 msgid "" "We recommend using Mooncake for deployment: " "[Mooncake](../features/pd_disaggregation_mooncake_multi_node.md)." msgstr "我们推荐使用 Mooncake 进行部署:[Mooncake](../features/pd_disaggregation_mooncake_multi_node.md)。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:272 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:271 msgid "" "Take Atlas 800 A3 (64G × 16) for example, we recommend to deploy 1P1D (3 " "nodes) to run Qwen3.5-397B-A17B." msgstr "以 Atlas 800 A3 (64G × 16) 为例,我们建议部署 1P1D(3 个节点)来运行 Qwen3.5-397B-A17B。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:274 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:273 msgid "`Qwen3.5-397B-A17B-w8a8-mtp 1P1D` require 3 Atlas 800 A3 (64G × 16)." msgstr "`Qwen3.5-397B-A17B-w8a8-mtp 1P1D` 需要 3 个 Atlas 800 A3 (64G × 16)。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:276 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:275 msgid "" "To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need " "to deploy `run_p.sh` 、`run_d0.sh` and `run_d1.sh` script on each node and" " deploy a `proxy.sh` script on prefill master node to forward requests." msgstr "要运行 vllm-ascend `Prefill-Decode Disaggregation` 服务,您需要在每个节点上部署 `run_p.sh`、`run_d0.sh` 和 `run_d1.sh` 脚本,并在预填充主节点上部署一个 `proxy.sh` 脚本来转发请求。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:278 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:277 msgid "Prefill Node 0 `run_p.sh` script" msgstr "预填充节点 0 `run_p.sh` 脚本" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:353 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:352 msgid "Decode Node 0 `run_d0.sh` script" msgstr "解码节点 0 `run_d0.sh` 脚本" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:433 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:432 msgid "Decode Node 1 `run_d1.sh` script" msgstr "解码节点 1 `run_d1.sh` 脚本" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:512 -msgid "**Notice:** The parameters are explained as follows:" -msgstr "**注意:** 参数说明如下:" - -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:515 -msgid "" -"`--async-scheduling`: enables the asynchronous scheduling function. When " -"Multi-Token Prediction (MTP) is enabled, asynchronous scheduling of " -"operator delivery can be implemented to overlap the operator delivery " -"latency." -msgstr "" -"`--async-scheduling`:启用异步调度功能。当启用多令牌预测(MTP)时,可以实现算子交付的异步调度,以重叠算子交付延迟。" - -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:516 -msgid "" -"`cudagraph_capture_sizes`: The recommended value is `n x (mtp + 1)`. And " -"the min is `n = 1` and the max is `n = max-num-seqs`. For other values, " -"it is recommended to set them to the number of frequently occurring " -"requests on the Decode (D) node." -msgstr "" -"`cudagraph_capture_sizes`:推荐值为 `n x (mtp + 1)`。最小值为 `n = 1`,最大值为 `n = max-num-seqs`。对于其他值,建议设置为解码(D)节点上频繁出现的请求数量。" - -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:517 -msgid "" -"`recompute_scheduler_enable: true`: enables the recomputation scheduler. " -"When the Key-Value Cache (KV Cache) of the decode node is insufficient, " -"requests will be sent to the prefill node to recompute the KV Cache. In " -"the PD separation scenario, it is recommended to enable this " -"configuration on both prefill and decode nodes simultaneously." -msgstr "" -"`recompute_scheduler_enable: true`:启用重计算调度器。当解码节点的键值缓存(KV Cache)不足时,请求将被发送到预填充节点以重新计算 KV Cache。在 PD 分离场景下,建议同时在预填充节点和解码节点上启用此配置。" - -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:518 -msgid "" -"`no-enable-prefix-caching`: The prefix-cache feature is enabled by " -"default. You can use the `--no-enable-prefix-caching` parameter to " -"disable this feature. Notice: for Prefill-Decode disaggregation feature, " -"known issue on D node: [#7944](https://github.com/vllm-project/vllm-" -"ascend/issues/7944)" -msgstr "" -"`no-enable-prefix-caching`:前缀缓存功能默认启用。您可以使用 `--no-enable-prefix-caching` 参数禁用此功能。注意:对于预填充-解码分离功能,D 节点上的已知问题:[#7944](https://github.com/vllm-project/vllm-ascend/issues/7944)" - -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:520 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:519 msgid "Run the `proxy.sh` script on the prefill master node" msgstr "在预填充主节点上运行 `proxy.sh` 脚本" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:522 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:521 msgid "" "Run a proxy server on the same node with the prefiller service instance. " "You can get the proxy program in the repository's examples: " "[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-" "project/vllm-" "ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" -msgstr "" -"在与预填充服务实例相同的节点上运行一个代理服务器。您可以在仓库的示例中找到代理程序:[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" +msgstr "在与预填充服务实例相同的节点上运行一个代理服务器。您可以在仓库的示例中找到代理程序:[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:548 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:547 msgid "Functional Verification" msgstr "功能验证" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:550 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:549 msgid "Once your server is started, you can query the model with input prompts:" msgstr "服务器启动后,您可以使用输入提示词查询模型:" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:563 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:562 msgid "Accuracy Evaluation" msgstr "精度评估" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:565 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:564 msgid "Here are two accuracy evaluation methods." msgstr "以下是两种精度评估方法。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:567 -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:579 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:566 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:578 msgid "Using AISBench" msgstr "使用 AISBench" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:569 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:568 msgid "" "Refer to [Using " "AISBench](../../developer_guide/evaluation/using_ais_bench.md) for " "details." msgstr "详情请参阅[使用 AISBench](../../developer_guide/evaluation/using_ais_bench.md)。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:571 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:570 msgid "" "After execution, you can get the result, here is the result of `Qwen3.5" "-397B-A17B-w8a8` in `vllm-ascend:v0.17.0rc1` for reference only." @@ -489,53 +490,53 @@ msgstr "生成" msgid "96.74" msgstr "96.74" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:577 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:576 msgid "Performance" msgstr "性能" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:581 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:580 msgid "" "Refer to [Using AISBench for performance " "evaluation](../../developer_guide/evaluation/using_ais_bench.md#execute-" "performance-evaluation) for details." msgstr "详情请参阅[使用 AISBench 进行性能评估](../../developer_guide/evaluation/using_ais_bench.md#execute-performance-evaluation)。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:583 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:582 msgid "Using vLLM Benchmark" msgstr "使用 vLLM Benchmark" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:585 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:584 msgid "Run performance evaluation of `Qwen3.5-397B-A17B-w8a8` as an example." msgstr "以运行 `Qwen3.5-397B-A17B-w8a8` 的性能评估为例。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:587 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:586 msgid "" "Refer to [vllm " "benchmark](https://docs.vllm.ai/en/latest/contributing/benchmarks.html) " "for more details." msgstr "更多详情请参阅 [vllm benchmark](https://docs.vllm.ai/en/latest/contributing/benchmarks.html)。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:589 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:588 msgid "There are three `vllm bench` subcommands:" msgstr "`vllm bench` 有三个子命令:" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:591 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:590 msgid "`latency`: Benchmark the latency of a single batch of requests." msgstr "`latency`:对单批请求的延迟进行基准测试。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:592 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:591 msgid "`serve`: Benchmark the online serving throughput." msgstr "`serve`:对在线服务吞吐量进行基准测试。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:593 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:592 msgid "`throughput`: Benchmark offline inference throughput." msgstr "`throughput`:对离线推理吞吐量进行基准测试。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:595 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:594 msgid "Take the `serve` as an example. Run the code as follows." msgstr "以 `serve` 为例。运行代码如下。" -#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:602 +#: ../../source/tutorials/models/Qwen3.5-397B-A17B.md:601 msgid "" "After about several minutes, you can get the performance evaluation " "result." diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Fine_grained_TP.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Fine_grained_TP.po index 1119975f..1fa64af4 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Fine_grained_TP.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Fine_grained_TP.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -20,8 +20,8 @@ msgstr "" "Generated-By: Babel 2.18.0\n" #: ../../source/user_guide/feature_guide/Fine_grained_TP.md:1 -msgid "Fine-Grained Tensor Parallelism (Finegrained TP)" -msgstr "细粒度张量并行 (Finegrained TP)" +msgid "Fine-Grained Tensor Parallelism (Fine-grained TP)" +msgstr "细粒度张量并行 (Fine-grained TP)" #: ../../source/user_guide/feature_guide/Fine_grained_TP.md:3 msgid "Overview" @@ -37,7 +37,10 @@ msgid "" "model head (lm_head), attention output projection (o_proj), and MLP " "blocks—via the `finegrained_tp_config` parameter." msgstr "" -"细粒度张量并行 (Fine-grained TP) 扩展了标准张量并行,允许为**不同的模型组件设置独立的张量并行规模**。与对所有层应用单一的全局 `tensor_parallel_size` 不同,细粒度 TP 允许用户通过 `finegrained_tp_config` 参数为关键模块(如嵌入层、语言模型头部 (lm_head)、注意力输出投影层 (o_proj) 和 MLP 块)配置独立的 TP 规模。" +"细粒度张量并行 (Fine-grained TP) " +"扩展了标准张量并行,允许为**不同的模型组件设置独立的张量并行规模**。与对所有层应用单一的全局 `tensor_parallel_size` " +"不同,细粒度 TP 允许用户通过 `finegrained_tp_config` 参数为关键模块(如嵌入层、语言模型头部 " +"(lm_head)、注意力输出投影层 (o_proj) 和 MLP 块)配置独立的 TP 规模。" #: ../../source/user_guide/feature_guide/Fine_grained_TP.md:7 msgid "" @@ -47,10 +50,11 @@ msgid "" "compatible with standard dense transformer architectures and integrates " "seamlessly into vLLM’s serving pipeline." msgstr "" -"此功能支持在单个模型内使用异构并行策略,从而能更精细地控制跨设备的权重分布、内存布局和通信模式。该特性与标准的密集 Transformer 架构兼容,并能无缝集成到 vLLM 的服务流水线中。" +"此功能支持在单个模型内使用异构并行策略,从而能更精细地控制跨设备的权重分布、内存布局和通信模式。该特性与标准的密集 Transformer " +"架构兼容,并能无缝集成到 vLLM 的服务流水线中。" #: ../../source/user_guide/feature_guide/Fine_grained_TP.md:11 -msgid "Benefits of Finegrained TP" +msgid "Benefits of Fine-grained TP" msgstr "细粒度 TP 的优势" #: ../../source/user_guide/feature_guide/Fine_grained_TP.md:13 @@ -62,11 +66,12 @@ msgstr "细粒度张量并行通过有针对性的权重分片带来两个主要 #: ../../source/user_guide/feature_guide/Fine_grained_TP.md:15 msgid "" "**Reduced Per-Device Memory Footprint**: Fine-grained TP shards large " -"weight matrices(e.g., LM Head, o_proj)across devices, lowering peak " +"weight matrices (e.g., LM Head, o_proj) across devices, lowering peak " "memory usage and enabling larger batches or deployment on memory-limited " "hardware—without quantization." msgstr "" -"**降低单设备内存占用**: 细粒度 TP 将大型权重矩阵(例如 LM Head、o_proj)分片到多个设备上,降低了峰值内存使用量,从而支持更大的批次或在内存受限的硬件上进行部署——无需量化。" +"**降低单设备内存占用**: 细粒度 TP 将大型权重矩阵(例如 LM " +"Head、o_proj)分片到多个设备上,降低了峰值内存使用量,从而支持更大的批次或在内存受限的硬件上进行部署——无需量化。" #: ../../source/user_guide/feature_guide/Fine_grained_TP.md:18 msgid "" @@ -76,7 +81,9 @@ msgid "" "efficiency—especially for latency-sensitive layers like LM Head and " "o_proj." msgstr "" -"**加速 GEMM 中的内存访问**: 在解码密集型工作负载中,GEMM 性能通常受内存带宽限制。权重分片减少了每个设备需要获取的权重数据量,从而降低了 DRAM 流量并提高了带宽效率——对于 LM Head 和 o_proj 等延迟敏感层尤其如此。" +"**加速 GEMM 中的内存访问**: 在解码密集型工作负载中,GEMM " +"性能通常受内存带宽限制。权重分片减少了每个设备需要获取的权重数据量,从而降低了 DRAM 流量并提高了带宽效率——对于 LM Head 和 " +"o_proj 等延迟敏感层尤其如此。" #: ../../source/user_guide/feature_guide/Fine_grained_TP.md:21 msgid "" @@ -99,7 +106,9 @@ msgid "" "Fine-grained TP is **model-agnostic** and supports all standard dense " "transformer architectures, including Llama, Qwen, DeepSeek (base/dense " "variants), and others." -msgstr "细粒度 TP 是**模型无关的**,支持所有标准的密集 Transformer 架构,包括 Llama、Qwen、DeepSeek(基础/密集变体)等。" +msgstr "" +"细粒度 TP 是**模型无关的**,支持所有标准的密集 Transformer 架构,包括 " +"Llama、Qwen、DeepSeek(基础/密集变体)等。" #: ../../source/user_guide/feature_guide/Fine_grained_TP.md:31 msgid "Component & Execution Mode Support" @@ -161,7 +170,9 @@ msgstr "⚠️ 注意:" msgid "" "`o_proj` TP is only supported in Graph mode during Decode, because " "dummy_run in eager mode will not trigger o_proj." -msgstr "`o_proj` TP 仅在 Decode 阶段的 Graph 模式下受支持,因为 eager 模式下的 dummy_run 不会触发 o_proj。" +msgstr "" +"`o_proj` TP 仅在 Decode 阶段的 Graph 模式下受支持,因为 eager 模式下的 dummy_run 不会触发 " +"o_proj。" #: ../../source/user_guide/feature_guide/Fine_grained_TP.md:43 msgid "" @@ -194,7 +205,7 @@ msgid "" msgstr "⚠️ 违反这些约束将导致运行时错误或未定义行为。" #: ../../source/user_guide/feature_guide/Fine_grained_TP.md:56 -msgid "How to Use Finegrained TP" +msgid "How to Use Fine-grained TP" msgstr "如何使用细粒度 TP" #: ../../source/user_guide/feature_guide/Fine_grained_TP.md:58 @@ -222,7 +233,9 @@ msgid "" "decode instances in an environment of 32 cards Ascend 910B*64G (A2), with" " parallel configuration as DP32+EP32, and fine-grained TP size of 8; the " "performance data is as follows." -msgstr "为评估细粒度 TP 在大规模服务场景中的有效性,我们使用模型 **DeepSeek-R1-W8A8**,在 32 卡 Ascend 910B*64G (A2) 环境中部署 PD 分离的解码实例,并行配置为 DP32+EP32,细粒度 TP 规模为 8;性能数据如下。" +msgstr "" +"为评估细粒度 TP 在大规模服务场景中的有效性,我们使用模型 **DeepSeek-R1-W8A8**,在 32 卡 Ascend " +"910B*64G (A2) 环境中部署 PD 分离的解码实例,并行配置为 DP32+EP32,细粒度 TP 规模为 8;性能数据如下。" #: ../../source/user_guide/feature_guide/Fine_grained_TP.md msgid "Module" @@ -304,4 +317,6 @@ msgid "" "PD separation, where models are typically deployed in all-DP mode. In " "this setup, sharding weight-heavy layers reduces redundant storage and " "memory pressure." -msgstr "细粒度 TP 在 PD 分离的**解码实例**中**最有效**,因为模型通常以全 DP 模式部署。在此设置中,对权重密集的层进行分片可以减少冗余存储和内存压力。" \ No newline at end of file +msgstr "" +"细粒度 TP 在 PD 分离的**解码实例**中**最有效**,因为模型通常以全 DP " +"模式部署。在此设置中,对权重密集的层进行分片可以减少冗余存储和内存压力。" \ No newline at end of file diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/epd_disaggregation.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/epd_disaggregation.po index ff96621a..7a729fd6 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/epd_disaggregation.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/epd_disaggregation.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -34,7 +34,8 @@ msgid "" "Deploying these two stages in independent vLLM instances brings three " "practical benefits:" msgstr "" -"**解耦编码器** 将多模态大语言模型的视觉编码器阶段运行在与预填充/解码器阶段分离的进程中。将这两个阶段部署在独立的 vLLM 实例中,带来三个实际好处:" +"**解耦编码器** 将多模态大语言模型的视觉编码器阶段运行在与预填充/解码器阶段分离的进程中。将这两个阶段部署在独立的 vLLM " +"实例中,带来三个实际好处:" #: ../../source/user_guide/feature_guide/epd_disaggregation.md:7 msgid "**Independent, fine-grained scaling**" @@ -89,8 +90,8 @@ msgid "" "Design doc: " msgstr "" -"设计文档:" +"设计文档:" #: ../../source/user_guide/feature_guide/epd_disaggregation.md:27 msgid "Usage" @@ -107,16 +108,16 @@ msgid "" "1 Encoder instance + 1 PD instance: " "`examples/online_serving/disaggregated_encoder/disagg_1e1pd/`" msgstr "" -"1 个编码器实例 + 1 个 PD 实例:" -"`examples/online_serving/disaggregated_encoder/disagg_1e1pd/`" +"1 个编码器实例 + 1 个 PD " +"实例:`examples/online_serving/disaggregated_encoder/disagg_1e1pd/`" #: ../../source/user_guide/feature_guide/epd_disaggregation.md:35 msgid "" "1 Encoder instance + 1 Prefill instance + 1 Decode instance: " "`examples/online_serving/disaggregated_encoder/disagg_1e1p1d/`" msgstr "" -"1 个编码器实例 + 1 个预填充实例 + 1 个解码实例:" -"`examples/online_serving/disaggregated_encoder/disagg_1e1p1d/`" +"1 个编码器实例 + 1 个预填充实例 + 1 " +"个解码实例:`examples/online_serving/disaggregated_encoder/disagg_1e1p1d/`" #: ../../source/user_guide/feature_guide/epd_disaggregation.md:40 msgid "Development" @@ -154,7 +155,8 @@ msgid "" "instance to the PD instance. All related code is under " "`vllm/distributed/ec_transfer`." msgstr "" -"一个连接器将编码器缓存 (EC) 嵌入向量从编码器实例传输到 PD 实例。所有相关代码位于 `vllm/distributed/ec_transfer` 目录下。" +"一个连接器将编码器缓存 (EC) 嵌入向量从编码器实例传输到 PD 实例。所有相关代码位于 " +"`vllm/distributed/ec_transfer` 目录下。" #: ../../source/user_guide/feature_guide/epd_disaggregation.md:53 msgid "Key abstractions" @@ -175,7 +177,7 @@ msgid "*Worker role* – loads the embeddings into memory." msgstr "*工作进程角色* – 将嵌入向量加载到内存中。" #: ../../source/user_guide/feature_guide/epd_disaggregation.md:59 -msgid "**EPD Load Balance Proxy** -" +msgid "**EPD Load Balancing Proxy** -" msgstr "**EPD 负载均衡代理** -" #: ../../source/user_guide/feature_guide/epd_disaggregation.md:60 @@ -200,12 +202,14 @@ msgid "" " to facilitate the kv transfer between P and D. For step-by-step " "deployment and configuration of Mooncake, refer to the following guide:" " " -"[https://docs.vllm.ai/projects/ascend/en/latest/tutorials/pd_disaggregation_mooncake_multi_node.html](https://docs.vllm.ai/projects/ascend/en/latest/tutorials/features/pd_disaggregation_mooncake_multi_node.html)" +"[https://docs.vllm.ai/projects/ascend/en/latest/tutorials/features/pd_disaggregation_mooncake_multi_node.html](https://docs.vllm.ai/projects/ascend/en/latest/tutorials/features/pd_disaggregation_mooncake_multi_node.html)" msgstr "" -"我们使用来自 `vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_layerwise_connector.py` 的 **MooncakeLayerwiseConnector** 创建示例设置,并参考 " -"`examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py` 来促进 P 和 D 之间的 KV 传输。关于 Mooncake 的逐步部署和配置,请参考以下指南:" -" " -"[https://docs.vllm.ai/projects/ascend/en/latest/tutorials/pd_disaggregation_mooncake_multi_node.html](https://docs.vllm.ai/projects/ascend/en/latest/tutorials/features/pd_disaggregation_mooncake_multi_node.html)" +"我们使用来自 " +"`vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_layerwise_connector.py`" +" 的 **MooncakeLayerwiseConnector** 创建示例设置,并参考 " +"`examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py`" +" 来促进 P 和 D 之间的 KV 传输。关于 Mooncake 的逐步部署和配置,请参考以下指南: " +"[https://docs.vllm.ai/projects/ascend/en/latest/tutorials/features/pd_disaggregation_mooncake_multi_node.html](https://docs.vllm.ai/projects/ascend/en/latest/tutorials/features/pd_disaggregation_mooncake_multi_node.html)" #: ../../source/user_guide/feature_guide/epd_disaggregation.md:66 msgid "" @@ -218,7 +222,10 @@ msgid "" "`docs/source/developer_guide/Design_Documents/disaggregated_prefill.md` " "shows the brief idea about the disaggregated prefill." msgstr "" -"对于 PD 解耦部分,当使用 MooncakeLayerwiseConnector 时:请求首先进入解码器实例,解码器通过元服务器反向触发一个远程预填充任务。然后预填充节点执行推理,并将 KV 缓存逐层推送到解码器,实现计算与传输的重叠。一旦传输完成,解码器无缝地继续后续的令牌生成。`docs/source/developer_guide/Design_Documents/disaggregated_prefill.md` 展示了关于解耦预填充的简要思路。" +"对于 PD 解耦部分,当使用 MooncakeLayerwiseConnector " +"时:请求首先进入解码器实例,解码器通过元服务器反向触发一个远程预填充任务。然后预填充节点执行推理,并将 KV " +"缓存逐层推送到解码器,实现计算与传输的重叠。一旦传输完成,解码器无缝地继续后续的令牌生成。`docs/source/developer_guide/Design_Documents/disaggregated_prefill.md`" +" 展示了关于解耦预填充的简要思路。" #: ../../source/user_guide/feature_guide/epd_disaggregation.md:69 msgid "Limitations" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/external_dp.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/external_dp.po index 77370fa7..81d1a184 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/external_dp.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/external_dp.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -35,10 +35,12 @@ msgid "" "vLLM deployment, with its own endpoint, and have an external router " "balance HTTP requests between them, making use of appropriate real-time " "telemetry from each server for routing decisions." -msgstr "在这种情况下,将每个数据并行等级视为一个独立的 vLLM 部署(拥有自己的端点),并使用一个外部路由器在它们之间平衡 HTTP 请求,同时利用来自每个服务器的适当实时遥测数据来做出路由决策,会更加方便。" +msgstr "" +"在这种情况下,将每个数据并行等级视为一个独立的 vLLM 部署(拥有自己的端点),并使用一个外部路由器在它们之间平衡 HTTP " +"请求,同时利用来自每个服务器的适当实时遥测数据来做出路由决策,会更加方便。" #: ../../source/user_guide/feature_guide/external_dp.md:7 -msgid "Getting Start" +msgid "Getting Started" msgstr "开始使用" #: ../../source/user_guide/feature_guide/external_dp.md:9 @@ -47,7 +49,9 @@ msgid "" "DP](https://docs.vllm.ai/en/latest/serving/data_parallel_deployment/?h=external" "#external-load-balancing) is already natively supported by vLLM. In vllm-" "ascend we provide two enhanced functionalities:" -msgstr "[外部数据并行](https://docs.vllm.ai/en/latest/serving/data_parallel_deployment/?h=external#external-load-balancing) 功能已由 vLLM 原生支持。在 vllm-ascend 中,我们提供了两项增强功能:" +msgstr "" +"[外部数据并行](https://docs.vllm.ai/en/latest/serving/data_parallel_deployment/?h=external" +"#external-load-balancing) 功能已由 vLLM 原生支持。在 vllm-ascend 中,我们提供了两项增强功能:" #: ../../source/user_guide/feature_guide/external_dp.md:11 msgid "" @@ -85,7 +89,9 @@ msgid "" "parallel. These can be mock servers or actual vLLM servers. Note that " "this proxy also works with only one vLLM server running, but will fall " "back to direct request forwarding which is meaningless." -msgstr "首先,您需要至少运行两个处于数据并行模式的 vLLM 服务器。这些可以是模拟服务器或实际的 vLLM 服务器。请注意,此代理在仅运行一个 vLLM 服务器时也能工作,但会退化为直接请求转发,这没有意义。" +msgstr "" +"首先,您需要至少运行两个处于数据并行模式的 vLLM 服务器。这些可以是模拟服务器或实际的 vLLM 服务器。请注意,此代理在仅运行一个 vLLM" +" 服务器时也能工作,但会退化为直接请求转发,这没有意义。" #: ../../source/user_guide/feature_guide/external_dp.md:29 msgid "" @@ -93,7 +99,9 @@ msgid "" "launch script in `examples/external_online_dp`. For scenarios of large DP" " size across multiple nodes, we recommend using our launch script for " "convenience." -msgstr "您可以手动逐个启动外部 vLLM 数据并行服务器,也可以使用 `examples/external_online_dp` 中的启动脚本。对于跨多个节点的大规模数据并行场景,我们建议使用我们的启动脚本以方便操作。" +msgstr "" +"您可以手动逐个启动外部 vLLM 数据并行服务器,也可以使用 `examples/external_online_dp` " +"中的启动脚本。对于跨多个节点的大规模数据并行场景,我们建议使用我们的启动脚本以方便操作。" #: ../../source/user_guide/feature_guide/external_dp.md:31 msgid "Manually Launch" @@ -112,7 +120,12 @@ msgid "" " instances in one command on each node. It will internally call " "`examples/external_online_dp/run_dp_template.sh` for each DP rank with " "proper DP-related parameters." -msgstr "首先,您需要根据您的 vLLM 配置修改 `examples/external_online_dp/run_dp_template.sh`。然后,您可以使用 `examples/external_online_dp/launch_online_dp.py` 在每个节点上通过一条命令启动多个 vLLM 实例。它将在内部为每个数据并行等级调用 `examples/external_online_dp/run_dp_template.sh`,并传入适当的数据并行相关参数。" +msgstr "" +"首先,您需要根据您的 vLLM 配置修改 " +"`examples/external_online_dp/run_dp_template.sh`。然后,您可以使用 " +"`examples/external_online_dp/launch_online_dp.py` 在每个节点上通过一条命令启动多个 vLLM " +"实例。它将在内部为每个数据并行等级调用 " +"`examples/external_online_dp/run_dp_template.sh`,并传入适当的数据并行相关参数。" #: ../../source/user_guide/feature_guide/external_dp.md:43 msgid "An example of running external DP in one single node:" @@ -131,7 +144,9 @@ msgid "" "After all vLLM DP instances are launched, you can now launch the load-" "balance proxy server, which serves as an entrypoint for coming requests " "and load-balances them between vLLM DP instances." -msgstr "所有 vLLM 数据并行实例启动后,您现在可以启动负载均衡代理服务器。该服务器作为传入请求的入口点,并在各个 vLLM 数据并行实例之间进行负载均衡。" +msgstr "" +"所有 vLLM 数据并行实例启动后,您现在可以启动负载均衡代理服务器。该服务器作为传入请求的入口点,并在各个 vLLM " +"数据并行实例之间进行负载均衡。" #: ../../source/user_guide/feature_guide/external_dp.md:70 msgid "The proxy server has the following features:" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po index e1bb9c05..99ab6f33 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: vllm-ascend \n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: zh_CN\n" @@ -24,7 +24,7 @@ msgid "Distributed DP Server With Large-Scale Expert Parallelism" msgstr "分布式数据并行服务器与大规模专家并行" #: ../../source/user_guide/feature_guide/large_scale_ep.md:3 -msgid "Getting Start" +msgid "Getting Started" msgstr "快速开始" #: ../../source/user_guide/feature_guide/large_scale_ep.md:5 @@ -42,7 +42,11 @@ msgid "" "independently, while the decoder nodes use the 192.0.0.5 node as the " "master node." msgstr "" -"vLLM-Ascend 现已支持在大规模**专家并行(EP)**场景下的预填充-解码(PD)解耦。为获得更好的性能,vLLM-Ascend 中应用了分布式数据并行服务器。在 PD 分离场景下,可以根据 PD 节点的不同特性实施不同的优化策略,从而实现更灵活的模型部署。以 DeepSeek 模型为例,使用 8 台 Atlas 800T A3 服务器部署模型。假设服务器 IP 从 192.0.0.1 开始到 192.0.0.8 结束。使用前 4 台服务器作为预填充节点,后 4 台服务器作为解码节点。并且预填充节点独立部署为主节点,而解码节点使用 192.0.0.5 节点作为主节点。" +"vLLM-Ascend 现已支持在大规模**专家并行(EP)**场景下的预填充-解码(PD)解耦。为获得更好的性能,vLLM-Ascend " +"中应用了分布式数据并行服务器。在 PD 分离场景下,可以根据 PD 节点的不同特性实施不同的优化策略,从而实现更灵活的模型部署。以 " +"DeepSeek 模型为例,使用 8 台 Atlas 800T A3 服务器部署模型。假设服务器 IP 从 192.0.0.1 开始到 " +"192.0.0.8 结束。使用前 4 台服务器作为预填充节点,后 4 台服务器作为解码节点。并且预填充节点独立部署为主节点,而解码节点使用 " +"192.0.0.5 节点作为主节点。" #: ../../source/user_guide/feature_guide/large_scale_ep.md:8 msgid "Verify Multi-Node Communication Environment" @@ -65,7 +69,8 @@ msgid "" "the Atlas A3 generation, both intra-node and inter-node connectivity are " "via HCCS." msgstr "" -"所有 NPU 必须互连。对于 Atlas A2 代,节点内连接通过 HCCS,节点间连接通过 RDMA。对于 Atlas A3 代,节点内和节点间连接均通过 HCCS。" +"所有 NPU 必须互连。对于 Atlas A2 代,节点内连接通过 HCCS,节点间连接通过 RDMA。对于 Atlas A3 " +"代,节点内和节点间连接均通过 HCCS。" #: ../../source/user_guide/feature_guide/large_scale_ep.md:15 msgid "Verification Process" @@ -145,7 +150,9 @@ msgid "" "master node independently, while the decoder nodes use the 192.0.0.5 node" " as the master node. This leads to differences in 'dp_size_local' and " "'dp_rank_start'" -msgstr "请注意,预填充节点和解码节点可能具有不同的配置。在此示例中,每个预填充节点独立部署为主节点,而解码节点使用 192.0.0.5 节点作为主节点。这导致了 'dp_size_local' 和 'dp_rank_start' 的差异。" +msgstr "" +"请注意,预填充节点和解码节点可能具有不同的配置。在此示例中,每个预填充节点独立部署为主节点,而解码节点使用 192.0.0.5 " +"节点作为主节点。这导致了 'dp_size_local' 和 'dp_rank_start' 的差异。" #: ../../source/user_guide/feature_guide/large_scale_ep.md:319 msgid "Example proxy for Distributed DP Server" @@ -251,7 +258,10 @@ msgid "" "[load\\_balance\\_proxy\\_server\\_example.py](https://github.com/vllm-" "project/vllm-" "ascend/blob/v0.9.1-dev/examples/disaggregate_prefill_v1/load_balance_proxy_server_example.py)" -msgstr "您可以在仓库的示例中找到代理程序,[load_balance_proxy_server_example.py](https://github.com/vllm-project/vllm-ascend/blob/v0.9.1-dev/examples/disaggregate_prefill_v1/load_balance_proxy_server_example.py)" +msgstr "" +"您可以在仓库的示例中找到代理程序,[load_balance_proxy_server_example.py](https://github.com" +"/vllm-project/vllm-" +"ascend/blob/v0.9.1-dev/examples/disaggregate_prefill_v1/load_balance_proxy_server_example.py)" #: ../../source/user_guide/feature_guide/large_scale_ep.md:366 msgid "Benchmark" @@ -262,7 +272,9 @@ msgid "" "We recommend using aisbench tool to assess performance. " "[aisbench](https://gitee.com/aisbench/benchmark). Execute the following " "commands to install aisbench" -msgstr "我们推荐使用 aisbench 工具评估性能。[aisbench](https://gitee.com/aisbench/benchmark)。执行以下命令安装 aisbench" +msgstr "" +"我们推荐使用 aisbench " +"工具评估性能。[aisbench](https://gitee.com/aisbench/benchmark)。执行以下命令安装 aisbench" #: ../../source/user_guide/feature_guide/large_scale_ep.md:376 msgid "" @@ -281,7 +293,9 @@ msgid "" "You can change the configuration in the directory " ":`benchmark/ais_bench/benchmark/configs/models/vllm_api` Take " "`vllm_api_stream_chat.py` as an example:" -msgstr "您可以在目录:`benchmark/ais_bench/benchmark/configs/models/vllm_api` 中更改配置。以 `vllm_api_stream_chat.py` 为例:" +msgstr "" +"您可以在目录:`benchmark/ais_bench/benchmark/configs/models/vllm_api` 中更改配置。以 " +"`vllm_api_stream_chat.py` 为例:" #: ../../source/user_guide/feature_guide/large_scale_ep.md:411 msgid "" @@ -293,7 +307,9 @@ msgstr "以 gsm8k 数据集为例,执行以下命令评估性能。" msgid "" "For more details on commands and parameters for aisbench, refer to " "[aisbench](https://gitee.com/aisbench/benchmark)" -msgstr "有关 aisbench 命令和参数的更多详细信息,请参考 [aisbench](https://gitee.com/aisbench/benchmark)" +msgstr "" +"有关 aisbench 命令和参数的更多详细信息,请参考 " +"[aisbench](https://gitee.com/aisbench/benchmark)" #: ../../source/user_guide/feature_guide/large_scale_ep.md:419 msgid "Prefill & Decode Configuration Details" @@ -368,7 +384,9 @@ msgid "" "is 7K. In this scenario, we give a recommended configuration for " "distributed DP server with high EP. Here we use 4 nodes for prefill and 4" " nodes for decode." -msgstr "例如,如果平均输入长度为 3.5k,输出长度为 1.1k,上下文长度为 16k,输入数据集的最大长度为 7K。在此场景下,我们为具有高 EP 的分布式数据并行服务器提供了一个推荐配置。这里我们使用 4 个节点进行预填充,4 个节点进行解码。" +msgstr "" +"例如,如果平均输入长度为 3.5k,输出长度为 1.1k,上下文长度为 16k,输入数据集的最大长度为 7K。在此场景下,我们为具有高 EP " +"的分布式数据并行服务器提供了一个推荐配置。这里我们使用 4 个节点进行预填充,4 个节点进行解码。" #: ../../source/user_guide/feature_guide/large_scale_ep.md:282 msgid "node" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po index 50585e3b..731dca33 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2026-04-14 09:08+0000\n" +"POT-Creation-Date: 2026-04-15 09:41+0000\n" "PO-Revision-Date: 2025-07-18 10:11+0800\n" "Last-Translator: \n" "Language: zh\n" @@ -32,7 +32,8 @@ msgid "" "follow the [official doc](https://docs.vllm.ai/projects/ascend/en/latest)" " to get started." msgstr "" -"这是 vLLM Ascend v0.17.0 的第一个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest)开始使用。" +"这是 vLLM Ascend v0.17.0 " +"的第一个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest)开始使用。" #: ../../source/user_guide/release_notes.md:7 #: ../../source/user_guide/release_notes.md:61 @@ -71,14 +72,16 @@ msgid "" "Ascend950 chip is now supported. [#7151](https://github.com/vllm-project" "/vllm-ascend/pull/7151)" msgstr "" -"现已支持 Ascend950 芯片。 [#7151](https://github.com/vllm-project/vllm-ascend/pull/7151)" +"现已支持 Ascend950 芯片。 [#7151](https://github.com/vllm-project/vllm-" +"ascend/pull/7151)" #: ../../source/user_guide/release_notes.md:10 msgid "" "ACLGraph (graph mode) is now supported for Model Runner V2. " "[#7110](https://github.com/vllm-project/vllm-ascend/pull/7110)" msgstr "" -"Model Runner V2 现已支持 ACLGraph(图模式)。 [#7110](https://github.com/vllm-project/vllm-ascend/pull/7110)" +"Model Runner V2 现已支持 ACLGraph(图模式)。 [#7110](https://github.com/vllm-" +"project/vllm-ascend/pull/7110)" #: ../../source/user_guide/release_notes.md:11 msgid "" @@ -86,7 +89,8 @@ msgid "" " draft inference schemes simultaneously. [#6766](https://github.com/vllm-" "project/vllm-ascend/pull/6766)" msgstr "" -"现已支持统一的并行化推测解码,可同时启用并行草稿推理方案。 [#6766](https://github.com/vllm-project/vllm-ascend/pull/6766)" +"现已支持统一的并行化推测解码,可同时启用并行草稿推理方案。 [#6766](https://github.com/vllm-project" +"/vllm-ascend/pull/6766)" #: ../../source/user_guide/release_notes.md:13 #: ../../source/user_guide/release_notes.md:66 @@ -105,7 +109,9 @@ msgid "" "not required now. [#7111](https://github.com/vllm-project/vllm-" "ascend/pull/7111)" msgstr "" -"支持从模型文件自动检测量化格式,同时也支持远程模型 ID(例如 `org/model-name`)。现在不再需要 `--quantization ascend` 参数。 [#7111](https://github.com/vllm-project/vllm-ascend/pull/7111)" +"支持从模型文件自动检测量化格式,同时也支持远程模型 ID(例如 `org/model-name`)。现在不再需要 `--quantization " +"ascend` 参数。 [#7111](https://github.com/vllm-project/vllm-" +"ascend/pull/7111)" #: ../../source/user_guide/release_notes.md:16 msgid "Qwen3.5 is supported from this version on." @@ -117,35 +123,40 @@ msgid "" "stage load balancing for better expert parallelism efficiency. " "[#6477](https://github.com/vllm-project/vllm-ascend/pull/6477)" msgstr "" -"用于 EPLB 的 FlashLB 算法:支持每步热度收集和多阶段负载均衡,以提高专家并行效率。 [#6477](https://github.com/vllm-project/vllm-ascend/pull/6477)" +"用于 EPLB 的 FlashLB 算法:支持每步热度收集和多阶段负载均衡,以提高专家并行效率。 " +"[#6477](https://github.com/vllm-project/vllm-ascend/pull/6477)" #: ../../source/user_guide/release_notes.md:18 msgid "" "LoRA with tensor parallel and `--fully-sharded-loras` is now fixed and " "working. [#6650](https://github.com/vllm-project/vllm-ascend/pull/6650)" msgstr "" -"已修复并支持了结合张量并行和 `--fully-sharded-loras` 的 LoRA。 [#6650](https://github.com/vllm-project/vllm-ascend/pull/6650)" +"已修复并支持了结合张量并行和 `--fully-sharded-loras` 的 LoRA。 [#6650](https://github.com" +"/vllm-project/vllm-ascend/pull/6650)" #: ../../source/user_guide/release_notes.md:19 msgid "" "LMCacheAscendConnector is added as a new KV cache pooling solution for " "Ascend. [#6882](https://github.com/vllm-project/vllm-ascend/pull/6882)" msgstr "" -"新增 LMCacheAscendConnector 作为 Ascend 的新 KV 缓存池化解决方案。 [#6882](https://github.com/vllm-project/vllm-ascend/pull/6882)" +"新增 LMCacheAscendConnector 作为 Ascend 的新 KV 缓存池化解决方案。 " +"[#6882](https://github.com/vllm-project/vllm-ascend/pull/6882)" #: ../../source/user_guide/release_notes.md:20 msgid "" "W8A8C8 quantization is now supported for DeepSeek-V3.2 in PD-mix " "scenario. [#7029](https://github.com/vllm-project/vllm-ascend/pull/7029)" msgstr "" -"现已在 PD-mix 场景下支持 DeepSeek-V3.2 的 W8A8C8 量化。 [#7029](https://github.com/vllm-project/vllm-ascend/pull/7029)" +"现已在 PD-mix 场景下支持 DeepSeek-V3.2 的 W8A8C8 量化。 [#7029](https://github.com" +"/vllm-project/vllm-ascend/pull/7029)" #: ../../source/user_guide/release_notes.md:21 msgid "" "[Experimental] Minimax-m2.5 model is now supported on Ascend NPU. " "[#7105](https://github.com/vllm-project/vllm-ascend/pull/7105)" msgstr "" -"[实验性] 现已在 Ascend NPU 上支持 Minimax-m2.5 模型。 [#7105](https://github.com/vllm-project/vllm-ascend/pull/7105)" +"[实验性] 现已在 Ascend NPU 上支持 Minimax-m2.5 模型。 [#7105](https://github.com" +"/vllm-project/vllm-ascend/pull/7105)" #: ../../source/user_guide/release_notes.md:22 msgid "" @@ -153,14 +164,16 @@ msgid "" " manager with multiple KV cache groups. [#7022](https://github.com/vllm-" "project/vllm-ascend/pull/7022)" msgstr "" -"[实验性] Mooncake Layerwise Connector 现在支持具有多个 KV 缓存组的混合注意力管理器。 [#7022](https://github.com/vllm-project/vllm-ascend/pull/7022)" +"[实验性] Mooncake Layerwise Connector 现在支持具有多个 KV 缓存组的混合注意力管理器。 " +"[#7022](https://github.com/vllm-project/vllm-ascend/pull/7022)" #: ../../source/user_guide/release_notes.md:23 msgid "" "[Experimental] Prefix cache is now supported in hybrid model. " "[#7103](https://github.com/vllm-project/vllm-ascend/pull/7103)" msgstr "" -"[实验性] 混合模型现已支持前缀缓存。 [#7103](https://github.com/vllm-project/vllm-ascend/pull/7103)" +"[实验性] 混合模型现已支持前缀缓存。 [#7103](https://github.com/vllm-project/vllm-" +"ascend/pull/7103)" #: ../../source/user_guide/release_notes.md:25 #: ../../source/user_guide/release_notes.md:83 @@ -178,14 +191,16 @@ msgid "" " PP deployments. [#7136](https://github.com/vllm-project/vllm-" "ascend/pull/7136)" msgstr "" -"流水线并行现在支持异步调度,提高了 PP 部署的吞吐量。 [#7136](https://github.com/vllm-project/vllm-ascend/pull/7136)" +"流水线并行现在支持异步调度,提高了 PP 部署的吞吐量。 [#7136](https://github.com/vllm-project" +"/vllm-ascend/pull/7136)" #: ../../source/user_guide/release_notes.md:28 msgid "" "Improved TTFT when using Mooncake connector by reducing log overhead. " "[#6125](https://github.com/vllm-project/vllm-ascend/pull/6125)" msgstr "" -"通过减少日志开销,改善了使用 Mooncake connector 时的首词元延迟。 [#6125](https://github.com/vllm-project/vllm-ascend/pull/6125)" +"通过减少日志开销,改善了使用 Mooncake connector 时的首词元延迟。 [#6125](https://github.com" +"/vllm-project/vllm-ascend/pull/6125)" #: ../../source/user_guide/release_notes.md:29 msgid "" @@ -193,7 +208,8 @@ msgid "" "block_size). [#7146](https://github.com/vllm-project/vllm-" "ascend/pull/7146)" msgstr "" -"针对短序列(token 长度 < block_size)优化了 KV 池查找。 [#7146](https://github.com/vllm-project/vllm-ascend/pull/7146)" +"针对短序列(token 长度 < block_size)优化了 KV 池查找。 [#7146](https://github.com/vllm-" +"project/vllm-ascend/pull/7146)" #: ../../source/user_guide/release_notes.md:30 msgid "" @@ -201,7 +217,8 @@ msgid "" "improvement. [#7013](https://github.com/vllm-project/vllm-" "ascend/pull/7013)" msgstr "" -"修复了 Model Runner V2 中的惩罚操作,实现了约 10% 的性能提升。 [#7013](https://github.com/vllm-project/vllm-ascend/pull/7013)" +"修复了 Model Runner V2 中的惩罚操作,实现了约 10% 的性能提升。 [#7013](https://github.com" +"/vllm-project/vllm-ascend/pull/7013)" #: ../../source/user_guide/release_notes.md:32 #: ../../source/user_guide/release_notes.md:101 @@ -215,21 +232,24 @@ msgid "" "Added EPD (Encode-Prefill-Decode) documentation and load-balance proxy " "example. [#6221](https://github.com/vllm-project/vllm-ascend/pull/6221)" msgstr "" -"新增了 EPD(编码-预填充-解码)文档和负载均衡代理示例。 [#6221](https://github.com/vllm-project/vllm-ascend/pull/6221)" +"新增了 EPD(编码-预填充-解码)文档和负载均衡代理示例。 [#6221](https://github.com/vllm-project" +"/vllm-ascend/pull/6221)" #: ../../source/user_guide/release_notes.md:35 msgid "" "Added Ascend PyTorch Profiler usage guide. [#7117](https://github.com" "/vllm-project/vllm-ascend/pull/7117)" msgstr "" -"新增了 Ascend PyTorch Profiler 使用指南。 [#7117](https://github.com/vllm-project/vllm-ascend/pull/7117)" +"新增了 Ascend PyTorch Profiler 使用指南。 [#7117](https://github.com/vllm-project" +"/vllm-ascend/pull/7117)" #: ../../source/user_guide/release_notes.md:36 msgid "" "Fixed DSV3.1 PD configuration documentation. [#7187](https://github.com" "/vllm-project/vllm-ascend/pull/7187)" msgstr "" -"修复了 DSV3.1 PD 配置文档。 [#7187](https://github.com/vllm-project/vllm-ascend/pull/7187)" +"修复了 DSV3.1 PD 配置文档。 [#7187](https://github.com/vllm-project/vllm-" +"ascend/pull/7187)" #: ../../source/user_guide/release_notes.md:38 #: ../../source/user_guide/release_notes.md:109 @@ -259,35 +279,41 @@ msgid "" "[#7158](https://github.com/vllm-project/vllm-ascend/pull/7158) " "[#7148](https://github.com/vllm-project/vllm-ascend/pull/7148)" msgstr "" -"修复了推测解码在全图模式下草稿器崩溃的问题。 [#7158](https://github.com/vllm-project/vllm-ascend/pull/7158) [#7148](https://github.com/vllm-project/vllm-ascend/pull/7148)" +"修复了推测解码在全图模式下草稿器崩溃的问题。 [#7158](https://github.com/vllm-project/vllm-" +"ascend/pull/7158) [#7148](https://github.com/vllm-project/vllm-" +"ascend/pull/7148)" #: ../../source/user_guide/release_notes.md:41 msgid "" "Fix GLM5-W8A8 precision issues caused by rotary quant MTP weights. " "[#7139](https://github.com/vllm-project/vllm-ascend/pull/7139)" msgstr "" -"修复了由旋转量化 MTP 权重引起的 GLM5-W8A8 精度问题。 [#7139](https://github.com/vllm-project/vllm-ascend/pull/7139)" +"修复了由旋转量化 MTP 权重引起的 GLM5-W8A8 精度问题。 [#7139](https://github.com/vllm-" +"project/vllm-ascend/pull/7139)" #: ../../source/user_guide/release_notes.md:42 msgid "" "Fix ngram graph replay accuracy error on 310P. [#7134](https://github.com" "/vllm-project/vllm-ascend/pull/7134)" msgstr "" -"修复了 310P 上 ngram 图重放的精度错误。 [#7134](https://github.com/vllm-project/vllm-ascend/pull/7134)" +"修复了 310P 上 ngram 图重放的精度错误。 [#7134](https://github.com/vllm-project/vllm-" +"ascend/pull/7134)" #: ../../source/user_guide/release_notes.md:43 msgid "" "Fix FIA pad logic in graph mode after upstream vLLM change. " "[#7144](https://github.com/vllm-project/vllm-ascend/pull/7144)" msgstr "" -"在上游 vLLM 变更后,修复了图模式下的 FIA 填充逻辑。 [#7144](https://github.com/vllm-project/vllm-ascend/pull/7144)" +"在上游 vLLM 变更后,修复了图模式下的 FIA 填充逻辑。 [#7144](https://github.com/vllm-project" +"/vllm-ascend/pull/7144)" #: ../../source/user_guide/release_notes.md:44 msgid "" "Fix a precision issue caused by wrong KV cache reshape on Qwen3.5. " "[#7209](https://github.com/vllm-project/vllm-ascend/pull/7209)" msgstr "" -"修复了 Qwen3.5 上因 KV 缓存重塑错误导致的精度问题。 [#7209](https://github.com/vllm-project/vllm-ascend/pull/7209)" +"修复了 Qwen3.5 上因 KV 缓存重塑错误导致的精度问题。 [#7209](https://github.com/vllm-project" +"/vllm-ascend/pull/7209)" #: ../../source/user_guide/release_notes.md:45 msgid "" @@ -312,8 +338,8 @@ msgid "" "project/vllm-ascend/pull/7109)" msgstr "" "通过将 torch_npu.npu_recurrent_gated_delta_rule 替换为 " -"fused_recurrent_gated_delta_rule 来修复 Qwen3.5 模型。[#7109](https://github.com/vllm-" -"project/vllm-ascend/pull/7109)" +"fused_recurrent_gated_delta_rule 来修复 Qwen3.5 " +"模型。[#7109](https://github.com/vllm-project/vllm-ascend/pull/7109)" #: ../../source/user_guide/release_notes.md:48 msgid "" @@ -336,14 +362,15 @@ msgid "" "project/vllm#30566](https://github.com/vllm-project/vllm/pull/30566), " "will not be included in v0.17.0." msgstr "" -"GLM5 需要 transformers==5.2.0,此问题将通过 [vllm-project/vllm#30566](https://github.com/vllm-project/vllm/pull/30566) 解决,不会包含在 v0.17.0 版本中。" +"GLM5 需要 transformers==5.2.0,此问题将通过 [vllm-" +"project/vllm#30566](https://github.com/vllm-project/vllm/pull/30566) " +"解决,不会包含在 v0.17.0 版本中。" #: ../../source/user_guide/release_notes.md:53 msgid "" "There is a precision issue with Qwen3-Next due to the changed tp weight " "split method. Will fix it in next release." -msgstr "" -"由于 TP 权重切分方法变更,Qwen3-Next 存在精度问题。将在下个版本中修复。" +msgstr "由于 TP 权重切分方法变更,Qwen3-Next 存在精度问题。将在下个版本中修复。" #: ../../source/user_guide/release_notes.md:54 msgid "" @@ -352,7 +379,8 @@ msgid "" "tp 2, the block_size is adjusted to 2048, which means that any prefix " "shorter than 2048 will never be cached." msgstr "" -"在混合模型中,当前前缀缓存命中所需的最小令牌数较大。具体数值与 TP 大小相关,例如,TP 为 2 时,block_size 调整为 2048,这意味着任何短于 2048 的前缀都不会被缓存。" +"在混合模型中,当前前缀缓存命中所需的最小令牌数较大。具体数值与 TP 大小相关,例如,TP 为 2 时,block_size 调整为 " +"2048,这意味着任何短于 2048 的前缀都不会被缓存。" #: ../../source/user_guide/release_notes.md:55 msgid "" @@ -361,7 +389,9 @@ msgid "" "[#7235](https://github.com/vllm-project/vllm-ascend/pull/7235) " "[#7290](https://github.com/vllm-project/vllm-ascend/pull/7290))." msgstr "" -"GLM5 在两节点 PD 混合部署场景中存在一个问题,当并发数超过 8 时推理可能挂起(已在 PR [#7235](https://github.com/vllm-project/vllm-ascend/pull/7235) 和 [#7290](https://github.com/vllm-project/vllm-ascend/pull/7290) 中修复)。" +"GLM5 在两节点 PD 混合部署场景中存在一个问题,当并发数超过 8 时推理可能挂起(已在 PR " +"[#7235](https://github.com/vllm-project/vllm-ascend/pull/7235) 和 " +"[#7290](https://github.com/vllm-project/vllm-ascend/pull/7290) 中修复)。" #: ../../source/user_guide/release_notes.md:57 msgid "v0.16.0rc1 - 2026.03.09" @@ -373,7 +403,8 @@ msgid "" "follow the [official doc](https://docs.vllm.ai/projects/ascend/en/latest)" " to get started." msgstr "" -"这是 vLLM Ascend v0.16.0 的第一个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest)开始使用。" +"这是 vLLM Ascend v0.16.0 " +"的第一个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest)开始使用。" #: ../../source/user_guide/release_notes.md:63 msgid "" @@ -389,16 +420,16 @@ msgid "" "dimensions. [#6902](https://github.com/vllm-project/vllm-" "ascend/pull/6902)" msgstr "" -"通过参数化硬编码的 MLA 维度,现已支持 GLM5-W8A8 量化。[#6902](https://github.com/vllm-project/vllm-" -"ascend/pull/6902)" +"通过参数化硬编码的 MLA 维度,现已支持 GLM5-W8A8 量化。[#6902](https://github.com/vllm-" +"project/vllm-ascend/pull/6902)" #: ../../source/user_guide/release_notes.md:68 msgid "" "[Experimental] Support FabricMem Mode for ADXL/HIXL interconnect. " "[#6806](https://github.com/vllm-project/vllm-ascend/pull/6806)" msgstr "" -"[实验性] 为 ADXL/HIXL 互连支持 FabricMem 模式。[#6806](https://github.com/vllm-project/vllm-" -"ascend/pull/6806)" +"[实验性] 为 ADXL/HIXL 互连支持 FabricMem 模式。[#6806](https://github.com/vllm-" +"project/vllm-ascend/pull/6806)" #: ../../source/user_guide/release_notes.md:69 msgid "" @@ -414,7 +445,8 @@ msgid "" "experience. [#6968](https://github.com/vllm-project/vllm-" "ascend/pull/6968)" msgstr "" -"NPUWorker Profiler 现在支持 profile_prefix,以提供更好的性能分析体验。[#6968](https://github.com/vllm-project/vllm-" +"NPUWorker Profiler 现在支持 " +"profile_prefix,以提供更好的性能分析体验。[#6968](https://github.com/vllm-project/vllm-" "ascend/pull/6968)" #: ../../source/user_guide/release_notes.md:71 @@ -422,10 +454,11 @@ msgid "" "EPLB profiling now displays expert hotness comparison and time required " "for eplb adjustment. [#6877](https://github.com/vllm-project/vllm-" "ascend/pull/6877) [#7001](https://github.com/vllm-project/vllm-" -"ascend/pull/7001)]" +"ascend/pull/7001)" msgstr "" -"EPLB 性能分析现在会显示专家热度对比以及 eplb 调整所需的时间。[#6877](https://github.com/vllm-project/vllm-" -"ascend/pull/6877) [#7001](https://github.com/vllm-project/vllm-ascend/pull/7001)]" +"EPLB 性能分析现在会显示专家热度对比以及 EPLB 调整所需的时间。[#6877](https://github.com/vllm-" +"project/vllm-ascend/pull/6877) [#7001](https://github.com/vllm-project/vllm-" +"ascend/pull/7001)" #: ../../source/user_guide/release_notes.md:72 msgid "" @@ -440,8 +473,8 @@ msgid "" "Mooncake Layerwise Connector now supports kv_pool. " "[#7032](https://github.com/vllm-project/vllm-ascend/pull/7032)" msgstr "" -"Mooncake Layerwise Connector 现在支持 kv_pool。[#7032](https://github.com/vllm-project/vllm-" -"ascend/pull/7032)" +"Mooncake Layerwise Connector 现在支持 kv_pool。[#7032](https://github.com" +"/vllm-project/vllm-ascend/pull/7032)" #: ../../source/user_guide/release_notes.md:74 msgid "" @@ -470,16 +503,16 @@ msgid "" "Added AscendC casual_conv1d_fn operator for Qwen3-Next. " "[#6661](https://github.com/vllm-project/vllm-ascend/pull/6661)" msgstr "" -"为 Qwen3-Next 添加了 AscendC casual_conv1d_fn 算子。[#6661](https://github.com/vllm-project/vllm-" -"ascend/pull/6661)" +"为 Qwen3-Next 添加了 AscendC casual_conv1d_fn 算子。[#6661](https://github.com" +"/vllm-project/vllm-ascend/pull/6661)" #: ../../source/user_guide/release_notes.md:80 msgid "" "Added Ascend Ops recurrent_gated_delta_rule operator. " "[#6725](https://github.com/vllm-project/vllm-ascend/pull/6725)" msgstr "" -"添加了 Ascend Ops recurrent_gated_delta_rule 算子。[#6725](https://github.com/vllm-project/vllm-" -"ascend/pull/6725)" +"添加了 Ascend Ops recurrent_gated_delta_rule 算子。[#6725](https://github.com" +"/vllm-project/vllm-ascend/pull/6725)" #: ../../source/user_guide/release_notes.md:81 msgid "" @@ -495,16 +528,16 @@ msgid "" "0.59% for Qwen3-VL models. [#7017](https://github.com/vllm-project/vllm-" "ascend/pull/7017)" msgstr "" -"更快的卷积计算将 Qwen3-VL 模型的 TTFT 提升了 0.95%,吞吐量提升了 0.59%。[#7017](https://github.com/vllm-project/vllm-" -"ascend/pull/7017)" +"更快的卷积计算将 Qwen3-VL 模型的 TTFT 提升了 0.95%,吞吐量提升了 " +"0.59%。[#7017](https://github.com/vllm-project/vllm-ascend/pull/7017)" #: ../../source/user_guide/release_notes.md:86 msgid "" "Optimize split_qkv_rmsnorm_rope operator. [#6827](https://github.com" "/vllm-project/vllm-ascend/pull/6827)" msgstr "" -"优化 split_qkv_rmsnorm_rope 算子。[#6827](https://github.com/vllm-project/vllm-" -"ascend/pull/6827)" +"优化 split_qkv_rmsnorm_rope 算子。[#6827](https://github.com/vllm-project" +"/vllm-ascend/pull/6827)" #: ../../source/user_guide/release_notes.md:87 msgid "" @@ -512,7 +545,8 @@ msgid "" "ensuring non-overlapping CPU partitions and better resource management. " "[#6945](https://github.com/vllm-project/vllm-ascend/pull/6945)" msgstr "" -"实现全局 CPU 切片并改进 Ascend NPU 的 IRQ 绑定,确保 CPU 分区不重叠并实现更好的资源管理。[#6945](https://github.com/vllm-project/vllm-" +"实现全局 CPU 切片并改进 Ascend NPU 的 IRQ 绑定,确保 CPU " +"分区不重叠并实现更好的资源管理。[#6945](https://github.com/vllm-project/vllm-" "ascend/pull/6945)" #: ../../source/user_guide/release_notes.md:88 @@ -528,16 +562,16 @@ msgid "" "Avoid CPU sync in mrope_positions copy by using full tensor copy. " "[#7014](https://github.com/vllm-project/vllm-ascend/pull/7014)" msgstr "" -"通过使用完整张量拷贝,避免 mrope_positions 拷贝中的 CPU 同步。[#7014](https://github.com/vllm-project/vllm-" -"ascend/pull/7014)" +"通过使用完整张量拷贝,避免 mrope_positions 拷贝中的 CPU 同步。[#7014](https://github.com" +"/vllm-project/vllm-ascend/pull/7014)" #: ../../source/user_guide/release_notes.md:90 msgid "" "Remove H2D synchronization for expert_map in MoE models. " "[#7000](https://github.com/vllm-project/vllm-ascend/pull/7000)" msgstr "" -"移除 MoE 模型中 expert_map 的 H2D 同步。[#7000](https://github.com/vllm-project/vllm-" -"ascend/pull/7000)" +"移除 MoE 模型中 expert_map 的 H2D 同步。[#7000](https://github.com/vllm-project" +"/vllm-ascend/pull/7000)" #: ../../source/user_guide/release_notes.md:92 #: ../../source/user_guide/release_notes.md:199 @@ -554,8 +588,8 @@ msgid "" "not using the official image. [#6897](https://github.com/vllm-project" "/vllm-ascend/pull/6897)" msgstr "" -"CANN 已升级至 8.5.1,如果您未使用官方镜像,请记得手动升级。[#6897](https://github.com/vllm-project/vllm-" -"ascend/pull/6897)" +"CANN 已升级至 8.5.1,如果您未使用官方镜像,请记得手动升级。[#6897](https://github.com/vllm-" +"project/vllm-ascend/pull/6897)" #: ../../source/user_guide/release_notes.md:96 #: ../../source/user_guide/release_notes.md:203 @@ -572,8 +606,8 @@ msgid "" "`enable_sp`. [#6883](https://github.com/vllm-project/vllm-" "ascend/pull/6883)" msgstr "" -"配置选项 `enable_flash_comm_v1` 已重命名回 `enable_sp`。[#6883](https://github.com/vllm-project/vllm-" -"ascend/pull/6883)" +"配置选项 `enable_flash_comm_v1` 已重命名回 `enable_sp`。[#6883](https://github.com" +"/vllm-project/vllm-ascend/pull/6883)" #: ../../source/user_guide/release_notes.md:99 msgid "" @@ -583,7 +617,10 @@ msgid "" "the bug with the remote model id is fixed. [#6873](https://github.com" "/vllm-project/vllm-ascend/pull/6873)" msgstr "" -"从模型文件自动检测量化格式的功能已回退,在 v0.16.0rc1 中,我们仍需添加 `--quantization ascend` 来服务由 modelslim 量化的模型。在修复了远程模型 ID 相关的错误后,此功能将在下个版本中重新加入。[#6873](https://github.com/vllm-project/vllm-ascend/pull/6873)" +"从模型文件自动检测量化格式的功能已回退,在 v0.16.0rc1 中,我们仍需添加 `--quantization ascend` 来服务由 " +"modelslim 量化的模型。在修复了远程模型 ID " +"相关的错误后,此功能将在下个版本中重新加入。[#6873](https://github.com/vllm-project/vllm-" +"ascend/pull/6873)" #: ../../source/user_guide/release_notes.md:103 msgid "" @@ -613,19 +650,25 @@ msgstr "" msgid "" "Added GLM4.x multi-node deploy tutorial. [#6872](https://github.com/vllm-" "project/vllm-ascend/pull/6872)" -msgstr "新增 GLM4.x 多节点部署教程。 [#6872](https://github.com/vllm-project/vllm-ascend/pull/6872)" +msgstr "" +"新增 GLM4.x 多节点部署教程。[#6872](https://github.com/vllm-project/vllm-" +"ascend/pull/6872)" #: ../../source/user_guide/release_notes.md:107 msgid "" "Added explanation of 310p special param: max-model-len. " "[#7065](https://github.com/vllm-project/vllm-ascend/pull/7065)" -msgstr "新增 310p 特殊参数 max-model-len 的说明。 [#7065](https://github.com/vllm-project/vllm-ascend/pull/7065)" +msgstr "" +"新增 310p 特殊参数 max-model-len 的说明。[#7065](https://github.com/vllm-project" +"/vllm-ascend/pull/7065)" #: ../../source/user_guide/release_notes.md:111 msgid "" "Fix openEuler Dockerfile error. [#6871](https://github.com/vllm-project" "/vllm-ascend/pull/6871)" -msgstr "修复 openEuler Dockerfile 错误。 [#6871](https://github.com/vllm-project/vllm-ascend/pull/6871)" +msgstr "" +"修复 openEuler Dockerfile 错误。 [#6871](https://github.com/vllm-project/vllm-" +"ascend/pull/6871)" #: ../../source/user_guide/release_notes.md:112 msgid "Many bug fixes including:" @@ -636,170 +679,228 @@ msgid "" "Fix Eagle speculative decoding with Context Parallel enabled. " "[#6981](https://github.com/vllm-project/vllm-ascend/pull/6981) " "[#7079](https://github.com/vllm-project/vllm-ascend/pull/7079)" -msgstr "修复启用上下文并行时 Eagle 推测式解码的问题。 [#6981](https://github.com/vllm-project/vllm-ascend/pull/6981) [#7079](https://github.com/vllm-project/vllm-ascend/pull/7079)" +msgstr "" +"修复启用上下文并行时 Eagle 推测式解码的问题。 [#6981](https://github.com/vllm-project/vllm-" +"ascend/pull/6981) [#7079](https://github.com/vllm-project/vllm-" +"ascend/pull/7079)" #: ../../source/user_guide/release_notes.md:114 msgid "" "Fix LoRA accuracy issue introduced by upstream vLLM changes. " "[#6958](https://github.com/vllm-project/vllm-ascend/pull/6958)" -msgstr "修复上游 vLLM 变更引入的 LoRA 精度问题。 [#6958](https://github.com/vllm-project/vllm-ascend/pull/6958)" +msgstr "" +"修复上游 vLLM 变更引入的 LoRA 精度问题。 [#6958](https://github.com/vllm-project/vllm-" +"ascend/pull/6958)" #: ../../source/user_guide/release_notes.md:115 msgid "" "Fix streaming content-type in load balance proxy server. " "[#6985](https://github.com/vllm-project/vllm-ascend/pull/6985)" -msgstr "修复负载均衡代理服务器中的流式传输 content-type。 [#6985](https://github.com/vllm-project/vllm-ascend/pull/6985)" +msgstr "" +"修复负载均衡代理服务器中的流式传输 content-type。 [#6985](https://github.com/vllm-project" +"/vllm-ascend/pull/6985)" #: ../../source/user_guide/release_notes.md:116 msgid "" "Fix metadata execute error: integer modulo by zero. " "[#6521](https://github.com/vllm-project/vllm-ascend/pull/6521)" -msgstr "修复元数据执行错误:整数除以零取模。 [#6521](https://github.com/vllm-project/vllm-ascend/pull/6521)" +msgstr "" +"修复元数据执行错误:整数除以零取模。 [#6521](https://github.com/vllm-project/vllm-" +"ascend/pull/6521)" #: ../../source/user_guide/release_notes.md:117 msgid "" "Fix triton rope_siso implementation bug. [#7082](https://github.com/vllm-" "project/vllm-ascend/pull/7082)" -msgstr "修复 triton rope_siso 实现中的错误。 [#7082](https://github.com/vllm-project/vllm-ascend/pull/7082)" +msgstr "" +"修复 triton rope_siso 实现中的错误。 [#7082](https://github.com/vllm-project/vllm-" +"ascend/pull/7082)" #: ../../source/user_guide/release_notes.md:118 msgid "" "Fix incorrect layer count for MTP models in update_aclgraph_sizes. " "[#7064](https://github.com/vllm-project/vllm-ascend/pull/7064)" -msgstr "修复 update_aclgraph_sizes 中 MTP 模型的层数错误。 [#7064](https://github.com/vllm-project/vllm-ascend/pull/7064)" +msgstr "" +"修复 update_aclgraph_sizes 中 MTP 模型的层数错误。 [#7064](https://github.com/vllm-" +"project/vllm-ascend/pull/7064)" #: ../../source/user_guide/release_notes.md:119 msgid "" "Fix compilation errors for CANN versions subsequent to b020. " "[#7059](https://github.com/vllm-project/vllm-ascend/pull/7059)" -msgstr "修复 b020 之后 CANN 版本的编译错误。 [#7059](https://github.com/vllm-project/vllm-ascend/pull/7059)" +msgstr "" +"修复 b020 之后 CANN 版本的编译错误。 [#7059](https://github.com/vllm-project/vllm-" +"ascend/pull/7059)" #: ../../source/user_guide/release_notes.md:120 msgid "" "Fix quant config support in GLM4.6V. [#7062](https://github.com/vllm-" "project/vllm-ascend/pull/7062)" -msgstr "修复 GLM4.6V 中的量化配置支持。 [#7062](https://github.com/vllm-project/vllm-ascend/pull/7062)" +msgstr "" +"修复 GLM4.6V 中的量化配置支持。 [#7062](https://github.com/vllm-project/vllm-" +"ascend/pull/7062)" #: ../../source/user_guide/release_notes.md:121 msgid "" "Fix parameter ordering bug in _merge_multimodal_embeddings. " "[#7068](https://github.com/vllm-project/vllm-ascend/pull/7068)" -msgstr "修复 _merge_multimodal_embeddings 中的参数顺序错误。 [#7068](https://github.com/vllm-project/vllm-ascend/pull/7068)" +msgstr "" +"修复 _merge_multimodal_embeddings 中的参数顺序错误。 [#7068](https://github.com" +"/vllm-project/vllm-ascend/pull/7068)" #: ../../source/user_guide/release_notes.md:122 msgid "" "Fix fused mc2 bug in EPLB. [#6794](https://github.com/vllm-project/vllm-" "ascend/pull/6794)" -msgstr "修复 EPLB 中的 fused mc2 错误。 [#6794](https://github.com/vllm-project/vllm-ascend/pull/6794)" +msgstr "" +"修复 EPLB 中的 fused mc2 错误。 [#6794](https://github.com/vllm-project/vllm-" +"ascend/pull/6794)" #: ../../source/user_guide/release_notes.md:123 msgid "" "Fix kernel block size for computing slot mapping. " "[#7019](https://github.com/vllm-project/vllm-ascend/pull/7019)" -msgstr "修复计算槽位映射的内核块大小。 [#7019](https://github.com/vllm-project/vllm-ascend/pull/7019)" +msgstr "" +"修复计算槽位映射的内核块大小。 [#7019](https://github.com/vllm-project/vllm-" +"ascend/pull/7019)" #: ../../source/user_guide/release_notes.md:124 msgid "" "Fix layerwise stacking MTP error in P/D disaggregation. " "[#7036](https://github.com/vllm-project/vllm-ascend/pull/7036)" -msgstr "修复 P/D 解耦中逐层堆叠 MTP 的错误。 [#7036](https://github.com/vllm-project/vllm-ascend/pull/7036)" +msgstr "" +"修复 P/D 解耦中逐层堆叠 MTP 的错误。 [#7036](https://github.com/vllm-project/vllm-" +"ascend/pull/7036)" #: ../../source/user_guide/release_notes.md:125 msgid "" "Fix RoPE dimension for npu_rotary_embedding. [#6880](https://github.com" "/vllm-project/vllm-ascend/pull/6880)" -msgstr "修复 npu_rotary_embedding 的 RoPE 维度。 [#6880](https://github.com/vllm-project/vllm-ascend/pull/6880)" +msgstr "" +"修复 npu_rotary_embedding 的 RoPE 维度。 [#6880](https://github.com/vllm-" +"project/vllm-ascend/pull/6880)" #: ../../source/user_guide/release_notes.md:126 msgid "" "Fix Qwen-Omni quantization bugs. [#7042](https://github.com/vllm-project" "/vllm-ascend/pull/7042) [#7007](https://github.com/vllm-project/vllm-" "ascend/pull/7007)" -msgstr "修复 Qwen-Omni 量化错误。 [#7042](https://github.com/vllm-project/vllm-ascend/pull/7042) [#7007](https://github.com/vllm-project/vllm-ascend/pull/7007)" +msgstr "" +"修复 Qwen-Omni 量化错误。 [#7042](https://github.com/vllm-project/vllm-" +"ascend/pull/7042) [#7007](https://github.com/vllm-project/vllm-" +"ascend/pull/7007)" #: ../../source/user_guide/release_notes.md:127 msgid "" "Fix GDN layer accuracy in graph mode. [#6822](https://github.com/vllm-" "project/vllm-ascend/pull/6822)" -msgstr "修复图模式下 GDN 层的精度问题。 [#6822](https://github.com/vllm-project/vllm-ascend/pull/6822)" +msgstr "" +"修复图模式下 GDN 层的精度问题。 [#6822](https://github.com/vllm-project/vllm-" +"ascend/pull/6822)" #: ../../source/user_guide/release_notes.md:128 msgid "" "Fix precision bugs for PCP/DCP in PD disaggregate. " "[#6876](https://github.com/vllm-project/vllm-ascend/pull/6876)" -msgstr "修复 PD 解耦中 PCP/DCP 的精度错误。 [#6876](https://github.com/vllm-project/vllm-ascend/pull/6876)" +msgstr "" +"修复 PD 解耦中 PCP/DCP 的精度错误。 [#6876](https://github.com/vllm-project/vllm-" +"ascend/pull/6876)" #: ../../source/user_guide/release_notes.md:129 msgid "" -"Fix MTP in PD disaggregation with fullgraph support for all D-Nodes. " +"Fix MTP in PD disaggregation with full graph support for all D-Nodes. " "[#6948](https://github.com/vllm-project/vllm-ascend/pull/6948)" -msgstr "修复 PD 解耦中的 MTP,为所有 D-Node 提供全图支持。 [#6948](https://github.com/vllm-project/vllm-ascend/pull/6948)" +msgstr "" +"修复 PD 解耦中的 MTP,为所有 D-Node 提供全图支持。 [#6948](https://github.com/vllm-project" +"/vllm-ascend/pull/6948)" #: ../../source/user_guide/release_notes.md:130 msgid "" "Fix GQA model error when enabling both DP and DCP. " "[#7012](https://github.com/vllm-project/vllm-ascend/pull/7012)" -msgstr "修复同时启用 DP 和 DCP 时 GQA 模型的错误。 [#7012](https://github.com/vllm-project/vllm-ascend/pull/7012)" +msgstr "" +"修复同时启用 DP 和 DCP 时 GQA 模型的错误。 [#7012](https://github.com/vllm-project" +"/vllm-ascend/pull/7012)" #: ../../source/user_guide/release_notes.md:131 msgid "" "Fix MTP prefill misclassified as decode edge case. " "[#6835](https://github.com/vllm-project/vllm-ascend/pull/6835)" -msgstr "修复 MTP 预填充被错误分类为解码的边缘情况。 [#6835](https://github.com/vllm-project/vllm-ascend/pull/6835)" +msgstr "" +"修复 MTP 预填充被错误分类为解码的边缘情况。 [#6835](https://github.com/vllm-project/vllm-" +"ascend/pull/6835)" #: ../../source/user_guide/release_notes.md:132 msgid "" "Fix Eagle3 acceptance rate for QuaRot quantized models. " "[#6914](https://github.com/vllm-project/vllm-ascend/pull/6914)" -msgstr "修复 QuaRot 量化模型的 Eagle3 接受率问题。 [#6914](https://github.com/vllm-project/vllm-ascend/pull/6914)" +msgstr "" +"修复 QuaRot 量化模型的 Eagle3 接受率问题。 [#6914](https://github.com/vllm-project" +"/vllm-ascend/pull/6914)" #: ../../source/user_guide/release_notes.md:133 msgid "" "Fix RoPE shape mismatch for MTP models with FlashComm V1 enabled. " "[#6939](https://github.com/vllm-project/vllm-ascend/pull/6939)" -msgstr "修复启用 FlashComm V1 时 MTP 模型的 RoPE 形状不匹配问题。 [#6939](https://github.com/vllm-project/vllm-ascend/pull/6939)" +msgstr "" +"修复启用 FlashComm V1 时 MTP 模型的 RoPE 形状不匹配问题。 [#6939](https://github.com" +"/vllm-project/vllm-ascend/pull/6939)" #: ../../source/user_guide/release_notes.md:134 msgid "" "Fix Qwen2.5VL accuracy issue. [#6975](https://github.com/vllm-project" "/vllm-ascend/pull/6975)" -msgstr "修复 Qwen2.5VL 精度问题。 [#6975](https://github.com/vllm-project/vllm-ascend/pull/6975)" +msgstr "" +"修复 Qwen2.5VL 精度问题。 [#6975](https://github.com/vllm-project/vllm-" +"ascend/pull/6975)" #: ../../source/user_guide/release_notes.md:135 msgid "" "Fix MoE forward error with static kernel enabled. " "[#6964](https://github.com/vllm-project/vllm-ascend/pull/6964)" -msgstr "修复启用静态内核时的 MoE 前向传播错误。 [#6964](https://github.com/vllm-project/vllm-ascend/pull/6964)" +msgstr "" +"修复启用静态内核时的 MoE 前向传播错误。 [#6964](https://github.com/vllm-project/vllm-" +"ascend/pull/6964)" #: ../../source/user_guide/release_notes.md:136 msgid "" "Fix muls_add fusion for GLM5 models. [#6928](https://github.com/vllm-" "project/vllm-ascend/pull/6928)" -msgstr "修复 GLM5 模型的 muls_add 融合问题。 [#6928](https://github.com/vllm-project/vllm-ascend/pull/6928)" +msgstr "" +"修复 GLM5 模型的 muls_add 融合问题。 [#6928](https://github.com/vllm-project/vllm-" +"ascend/pull/6928)" #: ../../source/user_guide/release_notes.md:137 msgid "" "Fix GDN layer detection for multimodal models. [#6941](https://github.com" "/vllm-project/vllm-ascend/pull/6941)" -msgstr "修复多模态模型的 GDN 层检测。 [#6941](https://github.com/vllm-project/vllm-ascend/pull/6941)" +msgstr "" +"修复多模态模型的 GDN 层检测。 [#6941](https://github.com/vllm-project/vllm-" +"ascend/pull/6941)" #: ../../source/user_guide/release_notes.md:138 msgid "" "Fix 300I unquant model weight nd2nz error. [#6851](https://github.com" "/vllm-project/vllm-ascend/pull/6851)" -msgstr "修复 300I 非量化模型权重的 nd2nz 错误。 [#6851](https://github.com/vllm-project/vllm-ascend/pull/6851)" +msgstr "" +"修复 300I 非量化模型权重的 nd2nz 错误。 [#6851](https://github.com/vllm-project/vllm-" +"ascend/pull/6851)" #: ../../source/user_guide/release_notes.md:139 msgid "" "Fix CPU binding logic. [#6889](https://github.com/vllm-project/vllm-" "ascend/pull/6889)" -msgstr "修复 CPU 绑定逻辑。 [#6889](https://github.com/vllm-project/vllm-ascend/pull/6889)" +msgstr "" +"修复 CPU 绑定逻辑。 [#6889](https://github.com/vllm-project/vllm-" +"ascend/pull/6889)" #: ../../source/user_guide/release_notes.md:140 msgid "" -"Fix Eagle fullgraph shape capture. [#6846](https://github.com/vllm-" +"Fix Eagle full graph shape capture. [#6846](https://github.com/vllm-" "project/vllm-ascend/pull/6846)" -msgstr "修复 Eagle 全图形状捕获问题。 [#6846](https://github.com/vllm-project/vllm-ascend/pull/6846)" +msgstr "" +"修复 Eagle 全图形状捕获问题。 [#6846](https://github.com/vllm-project/vllm-" +"ascend/pull/6846)" #: ../../source/user_guide/release_notes.md:144 msgid "" @@ -812,7 +913,9 @@ msgid "" "In 4-node A3 PD disaggregation deployment with DeepSeek V3.2, the P-Node " "may hang when benchmarking in high concurrency scenario, e.g., 2K/2K " "tokens with 512 concurrent requests." -msgstr "在使用 DeepSeek V3.2 的 4 节点 A3 PD 解耦部署中,P-Node 在高并发场景(例如,2K/2K tokens 和 512 个并发请求)下进行基准测试时可能出现挂起。" +msgstr "" +"在使用 DeepSeek V3.2 的 4 节点 A3 PD 解耦部署中,P-Node 在高并发场景(例如,2K/2K tokens 和 512 " +"个并发请求)下进行基准测试时可能出现挂起。" #: ../../source/user_guide/release_notes.md:146 #, python-brace-format @@ -820,9 +923,12 @@ msgid "" "MTP with large EP configurations may cause graph capture buffer overflow." " This is a bug need to fix in vLLM, now there is a workaround to avoid " "it: explicitly set `--compilation-config " -"'{\"max_cudagraph_capture_size\": N}'` where `N = max_concurrency × (1 + " +"'{\"max_cudagraph_capture_size\": N}'` where `N = max_concurrency * (1 + " "num_speculative_tokens)`." -msgstr "具有大规模 EP 配置的 MTP 可能导致图捕获缓冲区溢出。这是 vLLM 中需要修复的一个错误,目前有一个临时解决方案可以避免此问题:显式设置 `--compilation-config '{\"max_cudagraph_capture_size\": N}'`,其中 `N = max_concurrency × (1 + num_speculative_tokens)`。" +msgstr "" +"具有大规模 EP 配置的 MTP 可能导致图捕获缓冲区溢出。这是 vLLM 中需要修复的一个错误,目前有一个临时解决方案可以避免此问题:显式设置 " +"`--compilation-config '{\"max_cudagraph_capture_size\": N}'`,其中 `N = " +"max_concurrency × (1 + num_speculative_tokens)`。" #: ../../source/user_guide/release_notes.md:148 msgid "v0.15.0rc1 - 2026.02.27" @@ -833,7 +939,9 @@ msgid "" "This is the first release candidate of v0.15.0 for vLLM Ascend. Please " "follow the [official doc](https://docs.vllm.ai/projects/ascend/en/latest)" " to get started." -msgstr "这是 vLLM Ascend v0.15.0 的第一个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest)开始使用。" +msgstr "" +"这是 vLLM Ascend v0.15.0 " +"的第一个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest)开始使用。" #: ../../source/user_guide/release_notes.md:154 msgid "" @@ -843,7 +951,12 @@ msgid "" "[#6354](https://github.com/vllm-project/vllm-ascend/pull/6354) " "[#6664](https://github.com/vllm-project/vllm-ascend/pull/6664) " "[#6006](https://github.com/vllm-project/vllm-ascend/pull/6006)" -msgstr "**NPU Graph EX (npugraph_ex) 默认启用**:npugraph_ex 功能现已默认启用,通过集成的 inductor pass 和 MatmulAllReduceAddRMSNorm 融合提供更好的图优化。 [#6354](https://github.com/vllm-project/vllm-ascend/pull/6354) [#6664](https://github.com/vllm-project/vllm-ascend/pull/6664) [#6006](https://github.com/vllm-project/vllm-ascend/pull/6006)" +msgstr "" +"**NPU Graph EX (npugraph_ex) 默认启用**:npugraph_ex 功能现已默认启用,通过集成的 inductor " +"pass 和 MatmulAllReduceAddRMSNorm 融合提供更好的图优化。 [#6354](https://github.com" +"/vllm-project/vllm-ascend/pull/6354) [#6664](https://github.com/vllm-" +"project/vllm-ascend/pull/6664) [#6006](https://github.com/vllm-project" +"/vllm-ascend/pull/6006)" #: ../../source/user_guide/release_notes.md:155 msgid "" @@ -854,14 +967,22 @@ msgid "" "ascend/pull/6641) [#6454](https://github.com/vllm-project/vllm-" "ascend/pull/6454) [#6705](https://github.com/vllm-project/vllm-" "ascend/pull/6705)" -msgstr "**310P MoE 和 W8A8 支持**[实验性]:310P 现在支持 MoE 模型、W8A8 量化和 weightNZ 功能,显著扩展了硬件能力。 [#6530](https://github.com/vllm-project/vllm-ascend/pull/6530) [#6641](https://github.com/vllm-project/vllm-ascend/pull/6641) [#6454](https://github.com/vllm-project/vllm-ascend/pull/6454) [#6705](https://github.com/vllm-project/vllm-ascend/pull/6705)" +msgstr "" +"**310P MoE 和 W8A8 支持**[实验性]:310P 现在支持 MoE 模型、W8A8 量化和 weightNZ " +"功能,显著扩展了硬件能力。 [#6530](https://github.com/vllm-project/vllm-" +"ascend/pull/6530) [#6641](https://github.com/vllm-project/vllm-" +"ascend/pull/6641) [#6454](https://github.com/vllm-project/vllm-" +"ascend/pull/6454) [#6705](https://github.com/vllm-project/vllm-" +"ascend/pull/6705)" #: ../../source/user_guide/release_notes.md:156 msgid "" "**Qwen3-VL-MoE EAGLE Support**: Added EAGLE speculative decoding support " "for Qwen3-VL-MoE model. [#6327](https://github.com/vllm-project/vllm-" "ascend/pull/6327)" -msgstr "**Qwen3-VL-MoE EAGLE 支持**:为 Qwen3-VL-MoE 模型新增 EAGLE 推测式解码支持。 [#6327](https://github.com/vllm-project/vllm-ascend/pull/6327)" +msgstr "" +"**Qwen3-VL-MoE EAGLE 支持**:为 Qwen3-VL-MoE 模型新增 EAGLE 推测式解码支持。 " +"[#6327](https://github.com/vllm-project/vllm-ascend/pull/6327)" #: ../../source/user_guide/release_notes.md:157 msgid "" @@ -873,7 +994,12 @@ msgid "" "project/vllm/pull/34501). [#6755](https://github.com/vllm-project/vllm-" "ascend/pull/6755)" msgstr "" -"**Kimi-K2.5 模型支持**:新增对 Kimi-K2.5 模型的支持。**请注意**,vLLM 0.15.0 存在一个与 Kimi-K2.5 相关的已知问题。要修复此问题,请应用上游 `vllm-project/vllm` 仓库的更改,具体来自拉取请求 [#33320](https://github.com/vllm-project/vllm/pull/33320) 和 [#34501](https://github.com/vllm-project/vllm/pull/34501)。[#6755](https://github.com/vllm-project/vllm-ascend/pull/6755)" +"**Kimi-K2.5 模型支持**:新增对 Kimi-K2.5 模型的支持。**请注意**,vLLM 0.15.0 存在一个与 " +"Kimi-K2.5 相关的已知问题。要修复此问题,请应用上游 `vllm-project/vllm` 仓库的更改,具体来自拉取请求 " +"[#33320](https://github.com/vllm-project/vllm/pull/33320) 和 " +"[#34501](https://github.com/vllm-" +"project/vllm/pull/34501)。[#6755](https://github.com/vllm-project/vllm-" +"ascend/pull/6755)" #: ../../source/user_guide/release_notes.md:161 msgid "" @@ -881,14 +1007,16 @@ msgid "" "detected from model files. [#6645](https://github.com/vllm-project/vllm-" "ascend/pull/6645)" msgstr "" -"**自动检测量化格式**:现在可以从模型文件中自动检测量化格式。[#6645](https://github.com/vllm-project/vllm-ascend/pull/6645)" +"**自动检测量化格式**:现在可以从模型文件中自动检测量化格式。[#6645](https://github.com/vllm-project" +"/vllm-ascend/pull/6645)" #: ../../source/user_guide/release_notes.md:162 msgid "" "**GPT-OSS Attention Support**: Added GPT-OSS attention implementation. " "[#5901](https://github.com/vllm-project/vllm-ascend/pull/5901)" msgstr "" -"**GPT-OSS Attention 支持**:新增 GPT-OSS attention 实现。[#5901](https://github.com/vllm-project/vllm-ascend/pull/5901)" +"**GPT-OSS Attention 支持**:新增 GPT-OSS attention " +"实现。[#5901](https://github.com/vllm-project/vllm-ascend/pull/5901)" #: ../../source/user_guide/release_notes.md:163 msgid "" @@ -896,7 +1024,8 @@ msgid "" "SFA architecture. [#6563](https://github.com/vllm-project/vllm-" "ascend/pull/6563)" msgstr "" -"**SFA 架构的 DCP 支持**:为 SFA 架构新增解码上下文并行(DCP)支持。[#6563](https://github.com/vllm-project/vllm-ascend/pull/6563)" +"**SFA 架构的 DCP 支持**:为 SFA 架构新增解码上下文并行(DCP)支持。[#6563](https://github.com" +"/vllm-project/vllm-ascend/pull/6563)" #: ../../source/user_guide/release_notes.md:164 msgid "" @@ -904,7 +1033,8 @@ msgid "" "supports PCP function. [#6627](https://github.com/vllm-project/vllm-" "ascend/pull/6627)" msgstr "" -"**Mooncake 分层连接器 PCP 支持**:Mooncake 分层连接器现在支持 PCP 功能。[#6627](https://github.com/vllm-project/vllm-ascend/pull/6627)" +"**Mooncake 分层连接器 PCP 支持**:Mooncake 分层连接器现在支持 PCP " +"功能。[#6627](https://github.com/vllm-project/vllm-ascend/pull/6627)" #: ../../source/user_guide/release_notes.md:165 msgid "" @@ -912,14 +1042,16 @@ msgid "" "remote PTP size. [#5822](https://github.com/vllm-project/vllm-" "ascend/pull/5822)" msgstr "" -"**Mooncake 连接器远程 PTP 大小**:Mooncake 连接器现在可以获取远程 PTP 大小。[#5822](https://github.com/vllm-project/vllm-ascend/pull/5822)" +"**Mooncake 连接器远程 PTP 大小**:Mooncake 连接器现在可以获取远程 PTP " +"大小。[#5822](https://github.com/vllm-project/vllm-ascend/pull/5822)" #: ../../source/user_guide/release_notes.md:166 msgid "" "**KV Pool Sparse Attention**: KV pool now supports sparse attention. " "[#6339](https://github.com/vllm-project/vllm-ascend/pull/6339)" msgstr "" -"**KV 池稀疏注意力**:KV 池现在支持稀疏注意力。[#6339](https://github.com/vllm-project/vllm-ascend/pull/6339)" +"**KV 池稀疏注意力**:KV 池现在支持稀疏注意力。[#6339](https://github.com/vllm-project/vllm-" +"ascend/pull/6339)" #: ../../source/user_guide/release_notes.md:167 msgid "" @@ -927,14 +1059,16 @@ msgid "" "with AscendC. [#6590](https://github.com/vllm-project/vllm-" "ascend/pull/6590)" msgstr "" -"**基于 AscendC 的 Batch Invariant**:使用 AscendC 实现了 batch invariant 特性。[#6590](https://github.com/vllm-project/vllm-ascend/pull/6590)" +"**基于 AscendC 的 Batch Invariant**:使用 AscendC 实现了 batch invariant " +"特性。[#6590](https://github.com/vllm-project/vllm-ascend/pull/6590)" #: ../../source/user_guide/release_notes.md:168 msgid "" "**Routing Replay**: Added routing replay feature. " "[#6696](https://github.com/vllm-project/vllm-ascend/pull/6696)" msgstr "" -"**路由重放**:新增路由重放功能。[#6696](https://github.com/vllm-project/vllm-ascend/pull/6696)" +"**路由重放**:新增路由重放功能。[#6696](https://github.com/vllm-project/vllm-" +"ascend/pull/6696)" #: ../../source/user_guide/release_notes.md:169 msgid "" @@ -942,7 +1076,8 @@ msgid "" "compressed tensors moe w4a8 dynamic weight quantization. " "[#5889](https://github.com/vllm-project/vllm-ascend/pull/5889)" msgstr "" -"**压缩张量 MoE W4A8 动态权重**:新增对压缩张量 MoE W4A8 动态权重量化的支持。[#5889](https://github.com/vllm-project/vllm-ascend/pull/5889)" +"**压缩张量 MoE W4A8 动态权重**:新增对压缩张量 MoE W4A8 " +"动态权重量化的支持。[#5889](https://github.com/vllm-project/vllm-ascend/pull/5889)" #: ../../source/user_guide/release_notes.md:170 msgid "" @@ -950,7 +1085,8 @@ msgid "" "GLM4.7-Flash. [#6492](https://github.com/vllm-project/vllm-" "ascend/pull/6492)" msgstr "" -"**GLM4.7-Flash W8A8 量化**:为 GLM4.7-Flash 新增 W8A8 量化支持。[#6492](https://github.com/vllm-project/vllm-ascend/pull/6492)" +"**GLM4.7-Flash W8A8 量化**:为 GLM4.7-Flash 新增 W8A8 " +"量化支持。[#6492](https://github.com/vllm-project/vllm-ascend/pull/6492)" #: ../../source/user_guide/release_notes.md:171 msgid "" @@ -958,21 +1094,25 @@ msgid "" "supports bf16/float16 gmm1/gmm2 weight and ND format weight. " "[#6393](https://github.com/vllm-project/vllm-ascend/pull/6393)" msgstr "" -"**DispatchGmmCombineDecode 增强**:DispatchGmmCombineDecode 现在支持 bf16/float16 的 gmm1/gmm2 权重以及 ND 格式权重。[#6393](https://github.com/vllm-project/vllm-ascend/pull/6393)" +"**DispatchGmmCombineDecode 增强**:DispatchGmmCombineDecode 现在支持 " +"bf16/float16 的 gmm1/gmm2 权重以及 ND 格式权重。[#6393](https://github.com/vllm-" +"project/vllm-ascend/pull/6393)" #: ../../source/user_guide/release_notes.md:172 msgid "" "**RMSNorm Dynamic Quant Fusion**: Added rmsnorm dynamic quant fusion " "pass. [#6274](https://github.com/vllm-project/vllm-ascend/pull/6274)" msgstr "" -"**RMSNorm 动态量化融合**:新增 rmsnorm 动态量化融合 pass。[#6274](https://github.com/vllm-project/vllm-ascend/pull/6274)" +"**RMSNorm 动态量化融合**:新增 rmsnorm 动态量化融合 pass。[#6274](https://github.com" +"/vllm-project/vllm-ascend/pull/6274)" #: ../../source/user_guide/release_notes.md:173 msgid "" "**Worker Health Check Interface**: Added `check_health` interface for " "worker. [#6681](https://github.com/vllm-project/vllm-ascend/pull/6681)" msgstr "" -"**Worker 健康检查接口**:为 worker 新增 `check_health` 接口。[#6681](https://github.com/vllm-project/vllm-ascend/pull/6681)" +"**Worker 健康检查接口**:为 worker 新增 `check_health` " +"接口。[#6681](https://github.com/vllm-project/vllm-ascend/pull/6681)" #: ../../source/user_guide/release_notes.md:177 msgid "**310P Support Expansion**: Multiple improvements for 310P hardware:" @@ -983,28 +1123,32 @@ msgid "" "Fixed attention accuracy issue on 310P. [#6803](https://github.com/vllm-" "project/vllm-ascend/pull/6803)" msgstr "" -"修复了 310P 上的注意力精度问题。[#6803](https://github.com/vllm-project/vllm-ascend/pull/6803)" +"修复了 310P 上的注意力精度问题。[#6803](https://github.com/vllm-project/vllm-" +"ascend/pull/6803)" #: ../../source/user_guide/release_notes.md:179 msgid "" "Added weightNZ feature for 310P with quant or unquant support. " "[#6705](https://github.com/vllm-project/vllm-ascend/pull/6705)" msgstr "" -"为 310P 新增 weightNZ 特性,支持量化或非量化。[#6705](https://github.com/vllm-project/vllm-ascend/pull/6705)" +"为 310P 新增 weightNZ 特性,支持量化或非量化。[#6705](https://github.com/vllm-project" +"/vllm-ascend/pull/6705)" #: ../../source/user_guide/release_notes.md:180 msgid "" "Added addrmsnorm support for 300I DUO. [#6704](https://github.com/vllm-" "project/vllm-ascend/pull/6704)" msgstr "" -"为 300I DUO 新增 addrmsnorm 支持。[#6704](https://github.com/vllm-project/vllm-ascend/pull/6704)" +"为 300I DUO 新增 addrmsnorm 支持。[#6704](https://github.com/vllm-project/vllm-" +"ascend/pull/6704)" #: ../../source/user_guide/release_notes.md:181 msgid "" "310P now supports PrefillCacheHit state. [#6756](https://github.com/vllm-" "project/vllm-ascend/pull/6756)" msgstr "" -"310P 现在支持 PrefillCacheHit 状态。[#6756](https://github.com/vllm-project/vllm-ascend/pull/6756)" +"310P 现在支持 PrefillCacheHit 状态。[#6756](https://github.com/vllm-project" +"/vllm-ascend/pull/6756)" #: ../../source/user_guide/release_notes.md:182 msgid "" @@ -1012,7 +1156,8 @@ msgid "" " A3 policy. [#6686](https://github.com/vllm-project/vllm-" "ascend/pull/6686)" msgstr "" -"**仅 ARM CPU 绑定**:启用了仅 ARM CPU 绑定,采用 NUMA 均衡的 A3 策略。[#6686](https://github.com/vllm-project/vllm-ascend/pull/6686)" +"**仅 ARM CPU 绑定**:启用了仅 ARM CPU 绑定,采用 NUMA 均衡的 A3 " +"策略。[#6686](https://github.com/vllm-project/vllm-ascend/pull/6686)" #: ../../source/user_guide/release_notes.md:183 msgid "" @@ -1020,7 +1165,9 @@ msgid "" "from cos_sin_cache. [#5450](https://github.com/vllm-project/vllm-" "ascend/pull/5450)" msgstr "" -"**Triton Rope 增强**:Triton rope 现在支持从 cos_sin_cache 进行 index_selecting。[#5450](https://github.com/vllm-project/vllm-ascend/pull/5450)" +"**Triton Rope 增强**:Triton rope 现在支持从 cos_sin_cache 进行 " +"index_selecting。[#5450](https://github.com/vllm-project/vllm-" +"ascend/pull/5450)" #: ../../source/user_guide/release_notes.md:184 msgid "" @@ -1028,7 +1175,8 @@ msgid "" "to speed up GQA transfer. [#6366](https://github.com/vllm-project/vllm-" "ascend/pull/6366)" msgstr "" -"**AscendC 融合算子**:新增 AscendC 融合算子 transpose_kv_cache_by_block,以加速 GQA 传输。[#6366](https://github.com/vllm-project/vllm-ascend/pull/6366)" +"**AscendC 融合算子**:新增 AscendC 融合算子 transpose_kv_cache_by_block,以加速 GQA " +"传输。[#6366](https://github.com/vllm-project/vllm-ascend/pull/6366)" #: ../../source/user_guide/release_notes.md:185 msgid "" @@ -1036,7 +1184,8 @@ msgid "" "using partial rope in rotary_embedding. [#6581](https://github.com/vllm-" "project/vllm-ascend/pull/6581)" msgstr "" -"**Rotary_dim 参数**:在 rotary_embedding 中使用 partial rope 时,新增对 rotary_dim 参数的支持。[#6581](https://github.com/vllm-project/vllm-ascend/pull/6581)" +"**Rotary_dim 参数**:在 rotary_embedding 中使用 partial rope 时,新增对 rotary_dim " +"参数的支持。[#6581](https://github.com/vllm-project/vllm-ascend/pull/6581)" #: ../../source/user_guide/release_notes.md:189 msgid "" @@ -1044,7 +1193,9 @@ msgid "" "frequent D2H copy for better multimodal performance. " "[#6448](https://github.com/vllm-project/vllm-ascend/pull/6448)" msgstr "" -"**多模态 seq_lens CPU 缓存**:使用 `seq_lens` CPU 缓存以避免频繁的 D2H 拷贝,从而提升多模态性能。[#6448](https://github.com/vllm-project/vllm-ascend/pull/6448)" +"**多模态 seq_lens CPU 缓存**:使用 `seq_lens` CPU 缓存以避免频繁的 D2H " +"拷贝,从而提升多模态性能。[#6448](https://github.com/vllm-project/vllm-" +"ascend/pull/6448)" #: ../../source/user_guide/release_notes.md:190 msgid "" @@ -1053,7 +1204,9 @@ msgid "" "[#6468](https://github.com/vllm-project/vllm-ascend/pull/6468) " "[#6707](https://github.com/vllm-project/vllm-ascend/pull/6707)" msgstr "" -"**DispatchFFNCombine 优化**:优化了 DispatchFFNCombine 内核性能,并解决了因未对齐 UB 访问导致的向量错误。[#6468](https://github.com/vllm-project/vllm-ascend/pull/6468) [#6707](https://github.com/vllm-project/vllm-ascend/pull/6707)" +"**DispatchFFNCombine 优化**:优化了 DispatchFFNCombine 内核性能,并解决了因未对齐 UB " +"访问导致的向量错误。[#6468](https://github.com/vllm-project/vllm-ascend/pull/6468) " +"[#6707](https://github.com/vllm-project/vllm-ascend/pull/6707)" #: ../../source/user_guide/release_notes.md:191 msgid "" @@ -1061,7 +1214,8 @@ msgid "" "DeepSeek V3.2. [#6610](https://github.com/vllm-project/vllm-" "ascend/pull/6610)" msgstr "" -"**DeepSeek V3.2 KVCache 优化**:优化了 DeepSeek V3.2 的 KV 缓存使用。[#6610](https://github.com/vllm-project/vllm-ascend/pull/6610)" +"**DeepSeek V3.2 KVCache 优化**:优化了 DeepSeek V3.2 的 KV " +"缓存使用。[#6610](https://github.com/vllm-project/vllm-ascend/pull/6610)" #: ../../source/user_guide/release_notes.md:192 msgid "" @@ -1069,7 +1223,8 @@ msgid "" "consistent with MoE weight prefetch. [#6629](https://github.com/vllm-" "project/vllm-ascend/pull/6629)" msgstr "" -"**MLA/SFA 权重预取**:重构了 MLA/SFA 权重预取,使其与 MoE 权重预取保持一致。[#6629](https://github.com/vllm-project/vllm-ascend/pull/6629)" +"**MLA/SFA 权重预取**:重构了 MLA/SFA 权重预取,使其与 MoE " +"权重预取保持一致。[#6629](https://github.com/vllm-project/vllm-ascend/pull/6629)" #: ../../source/user_guide/release_notes.md:193 msgid "" @@ -1077,7 +1232,8 @@ msgid "" "with MoE model's prefetching. [#6442](https://github.com/vllm-project" "/vllm-ascend/pull/6442)" msgstr "" -"**MLP 权重预取**:重构了 MLP 权重预取,使其与 MoE 模型的预取保持一致。[#6442](https://github.com/vllm-project/vllm-ascend/pull/6442)" +"**MLP 权重预取**:重构了 MLP 权重预取,使其与 MoE 模型的预取保持一致。[#6442](https://github.com" +"/vllm-project/vllm-ascend/pull/6442)" #: ../../source/user_guide/release_notes.md:194 msgid "" @@ -1085,14 +1241,17 @@ msgid "" " linear_persistent kernel. [#6537](https://github.com/vllm-project/vllm-" "ascend/pull/6537)" msgstr "" -"**自适应块大小选择**:在 linear_persistent 内核中新增自适应块大小选择功能。[#6537](https://github.com/vllm-project/vllm-ascend/pull/6537)" +"**自适应块大小选择**:在 linear_persistent " +"内核中新增自适应块大小选择功能。[#6537](https://github.com/vllm-project/vllm-" +"ascend/pull/6537)" #: ../../source/user_guide/release_notes.md:195 msgid "" "**EPLB Memory Optimization**: Reduced memory used for heat aggregation in" " EPLB. [#6729](https://github.com/vllm-project/vllm-ascend/pull/6729)" msgstr "" -"**EPLB 内存优化**:减少了 EPLB 中用于热度聚合的内存使用。[#6729](https://github.com/vllm-project/vllm-ascend/pull/6729)" +"**EPLB 内存优化**:减少了 EPLB 中用于热度聚合的内存使用。[#6729](https://github.com/vllm-" +"project/vllm-ascend/pull/6729)" #: ../../source/user_guide/release_notes.md:196 msgid "" @@ -1100,21 +1259,25 @@ msgid "" "with memory migration and interrupt core binding functions. " "[#6785](https://github.com/vllm-project/vllm-ascend/pull/6785)" msgstr "" -"**内存迁移与中断核心绑定**:改进了绑定逻辑,增加了内存迁移和中断核心绑定功能。[#6785](https://github.com/vllm-project/vllm-ascend/pull/6785)" +"**内存迁移与中断核心绑定**:改进了绑定逻辑,增加了内存迁移和中断核心绑定功能。[#6785](https://github.com/vllm-" +"project/vllm-ascend/pull/6785)" #: ../../source/user_guide/release_notes.md:197 msgid "" "**Triton Stability**: Improved Triton stability on Ascend for large " "grids. [#6301](https://github.com/vllm-project/vllm-ascend/pull/6301)" msgstr "" -"**Triton 稳定性**:提升了 Triton 在 Ascend 上处理大规模网格时的稳定性。[#6301](https://github.com/vllm-project/vllm-ascend/pull/6301)" +"**Triton 稳定性**:提升了 Triton 在 Ascend " +"上处理大规模网格时的稳定性。[#6301](https://github.com/vllm-project/vllm-" +"ascend/pull/6301)" #: ../../source/user_guide/release_notes.md:201 msgid "" "**Mooncake**: Upgraded to v0.3.8.post1. [#6428](https://github.com/vllm-" "project/vllm-ascend/pull/6428)" msgstr "" -"**Mooncake**:升级至 v0.3.8.post1。[#6428](https://github.com/vllm-project/vllm-ascend/pull/6428)" +"**Mooncake**:升级至 v0.3.8.post1。[#6428](https://github.com/vllm-project" +"/vllm-ascend/pull/6428)" #: ../../source/user_guide/release_notes.md:205 msgid "" @@ -1122,35 +1285,41 @@ msgid "" "ProfileExecuteDuration feature. [#6461](https://github.com/vllm-project" "/vllm-ascend/pull/6461)" msgstr "" -"**ProfileExecuteDuration**:清理并弃用了 ProfileExecuteDuration 功能。[#6461](https://github.com/vllm-project/vllm-ascend/pull/6461)" +"**ProfileExecuteDuration**:清理并弃用了 ProfileExecuteDuration " +"功能。[#6461](https://github.com/vllm-project/vllm-ascend/pull/6461)" #: ../../source/user_guide/release_notes.md:206 msgid "" "**Custom rotary_embedding Operator**: Removed custom rotary_embedding " "operator. [#6523](https://github.com/vllm-project/vllm-ascend/pull/6523)" msgstr "" -"**自定义 rotary_embedding 算子**:移除了自定义 rotary_embedding 算子。[#6523](https://github.com/vllm-project/vllm-ascend/pull/6523)" +"**自定义 rotary_embedding 算子**:移除了自定义 rotary_embedding " +"算子。[#6523](https://github.com/vllm-project/vllm-ascend/pull/6523)" #: ../../source/user_guide/release_notes.md:207 msgid "" "**USE_OPTIMIZED_MODEL**: Cleaned up unused env `USE_OPTIMIZED_MODEL`. " "[#6618](https://github.com/vllm-project/vllm-ascend/pull/6618)" msgstr "" -"**USE_OPTIMIZED_MODEL**:清理了未使用的环境变量 `USE_OPTIMIZED_MODEL`。[#6618](https://github.com/vllm-project/vllm-ascend/pull/6618)" +"**USE_OPTIMIZED_MODEL**:清理了未使用的环境变量 " +"`USE_OPTIMIZED_MODEL`。[#6618](https://github.com/vllm-project/vllm-" +"ascend/pull/6618)" #: ../../source/user_guide/release_notes.md:211 msgid "" "Added AI-assisted model-adaptation workflow documentation for vllm-" "ascend. [#6731](https://github.com/vllm-project/vllm-ascend/pull/6731)" msgstr "" -"新增了 vllm-ascend 的 AI 辅助模型适配工作流文档。[#6731](https://github.com/vllm-project/vllm-ascend/pull/6731)" +"新增了 vllm-ascend 的 AI 辅助模型适配工作流文档。[#6731](https://github.com/vllm-project" +"/vllm-ascend/pull/6731)" #: ../../source/user_guide/release_notes.md:212 msgid "" "Added vLLM Ascend development guidelines (AGETNS.md). " "[#6797](https://github.com/vllm-project/vllm-ascend/pull/6797)" msgstr "" -"新增了 vLLM Ascend 开发指南 (AGETNS.md)。[#6797](https://github.com/vllm-project/vllm-ascend/pull/6797)" +"新增了 vLLM Ascend 开发指南 (AGETNS.md)。[#6797](https://github.com/vllm-project" +"/vllm-ascend/pull/6797)" #: ../../source/user_guide/release_notes.md:213 msgid "" @@ -1174,9 +1343,7 @@ msgstr "" msgid "" "Added request forwarding documentation. [#6780](https://github.com/vllm-" "project/vllm-ascend/pull/6780)" -msgstr "" -"新增请求转发文档。 [#6780](https://github.com/vllm-project/vllm-" -"ascend/pull/6780)" +msgstr "新增请求转发文档。 [#6780](https://github.com/vllm-project/vllm-ascend/pull/6780)" #: ../../source/user_guide/release_notes.md:216 msgid "" @@ -1190,9 +1357,7 @@ msgstr "" msgid "" "Restructured tutorial documentation. [#6501](https://github.com/vllm-" "project/vllm-ascend/pull/6501)" -msgstr "" -"重构了教程文档结构。 [#6501](https://github.com/vllm-project/vllm-" -"ascend/pull/6501)" +msgstr "重构了教程文档结构。 [#6501](https://github.com/vllm-project/vllm-ascend/pull/6501)" #: ../../source/user_guide/release_notes.md:218 msgid "" @@ -1204,25 +1369,28 @@ msgstr "" #: ../../source/user_guide/release_notes.md:222 msgid "" -"**MTP in PD Fullgraph**: Fixed support for ALL D-Nodes in fullgraph when " -"running MTP in PD deployment. [#5472](https://github.com/vllm-project" -"/vllm-ascend/pull/5472)" +"**MTP in PD Full graph**: Fixed support for ALL D-Nodes in full graph " +"when running MTP in PD deployment. [#5472](https://github.com/vllm-" +"project/vllm-ascend/pull/5472)" msgstr "" -"**PD 全图中的 MTP**:修复了在 PD 部署中运行 MTP 时,全图模式下对所有 D-Nodes 的支持问题。 [#5472](https://github.com/vllm-project/vllm-ascend/pull/5472)" +"**PD 全图中的 MTP**:修复了在 PD 部署中运行 MTP 时,全图模式下对所有 D-Nodes 的支持问题。 " +"[#5472](https://github.com/vllm-project/vllm-ascend/pull/5472)" #: ../../source/user_guide/release_notes.md:223 msgid "" "**DeepSeekV3.1 Accuracy**: Fixed DeepSeekV3.1 accuracy issue. " "[#6805](https://github.com/vllm-project/vllm-ascend/pull/6805)" msgstr "" -"**DeepSeekV3.1 准确性**:修复了 DeepSeekV3.1 的准确性问题。 [#6805](https://github.com/vllm-project/vllm-ascend/pull/6805)" +"**DeepSeekV3.1 准确性**:修复了 DeepSeekV3.1 的准确性问题。 [#6805](https://github.com" +"/vllm-project/vllm-ascend/pull/6805)" #: ../../source/user_guide/release_notes.md:224 msgid "" "**EAGLE Refactor**: Routed MTP to EAGLE except for PCP/DCP+MTP cases. " "[#6349](https://github.com/vllm-project/vllm-ascend/pull/6349)" msgstr "" -"**EAGLE 重构**:将 MTP 路由至 EAGLE,PCP/DCP+MTP 情况除外。 [#6349](https://github.com/vllm-project/vllm-ascend/pull/6349)" +"**EAGLE 重构**:将 MTP 路由至 EAGLE,PCP/DCP+MTP 情况除外。 [#6349](https://github.com" +"/vllm-project/vllm-ascend/pull/6349)" #: ../../source/user_guide/release_notes.md:225 msgid "" @@ -1230,14 +1398,16 @@ msgid "" "vLLM 0.15.0. [#6606](https://github.com/vllm-project/vllm-" "ascend/pull/6606)" msgstr "" -"**推测解码准确性**:修复了 vLLM 0.15.0 中的推测接受率问题。 [#6606](https://github.com/vllm-project/vllm-ascend/pull/6606)" +"**推测解码准确性**:修复了 vLLM 0.15.0 中的推测接受率问题。 [#6606](https://github.com/vllm-" +"project/vllm-ascend/pull/6606)" #: ../../source/user_guide/release_notes.md:226 msgid "" "**PCP/DCP Accuracy**: Fixed accuracy issue in PCP/DCP with speculative " "decoding. [#6491](https://github.com/vllm-project/vllm-ascend/pull/6491)" msgstr "" -"**PCP/DCP 准确性**:修复了 PCP/DCP 结合推测解码时的准确性问题。 [#6491](https://github.com/vllm-project/vllm-ascend/pull/6491)" +"**PCP/DCP 准确性**:修复了 PCP/DCP 结合推测解码时的准确性问题。 [#6491](https://github.com" +"/vllm-project/vllm-ascend/pull/6491)" #: ../../source/user_guide/release_notes.md:227 msgid "" @@ -1246,7 +1416,9 @@ msgid "" "/vllm-ascend/pull/6653) [#6528](https://github.com/vllm-project/vllm-" "ascend/pull/6528)" msgstr "" -"**动态 EPLB**:修复了动态 EPLB 无效的缺陷,且 EPLB 不再依赖特定模型。 [#6653](https://github.com/vllm-project/vllm-ascend/pull/6653) [#6528](https://github.com/vllm-project/vllm-ascend/pull/6528)" +"**动态 EPLB**:修复了动态 EPLB 无效的缺陷,且 EPLB 不再依赖特定模型。 [#6653](https://github.com" +"/vllm-project/vllm-ascend/pull/6653) [#6528](https://github.com/vllm-" +"project/vllm-ascend/pull/6528)" #: ../../source/user_guide/release_notes.md:228 msgid "" @@ -1254,7 +1426,8 @@ msgid "" "mooncake backend. [#6498](https://github.com/vllm-project/vllm-" "ascend/pull/6498)" msgstr "" -"**KV 池 Mooncake 后端**:正确初始化了 mooncake 后端的 head_or_tp_rank。 [#6498](https://github.com/vllm-project/vllm-ascend/pull/6498)" +"**KV 池 Mooncake 后端**:正确初始化了 mooncake 后端的 head_or_tp_rank。 " +"[#6498](https://github.com/vllm-project/vllm-ascend/pull/6498)" #: ../../source/user_guide/release_notes.md:229 msgid "" @@ -1262,35 +1435,40 @@ msgid "" "supports recompute scheduler. [#5900](https://github.com/vllm-project" "/vllm-ascend/pull/5900)" msgstr "" -"**分层连接器重计算调度器**:分层连接器现在支持重计算调度器。 [#5900](https://github.com/vllm-project/vllm-ascend/pull/5900)" +"**分层连接器重计算调度器**:分层连接器现在支持重计算调度器。 [#5900](https://github.com/vllm-project" +"/vllm-ascend/pull/5900)" #: ../../source/user_guide/release_notes.md:230 msgid "" "**Memcache Pool**: Fixed service startup failure when memcache pool is " "enabled. [#6229](https://github.com/vllm-project/vllm-ascend/pull/6229)" msgstr "" -"**Memcache 池**:修复了启用 memcache 池时服务启动失败的问题。 [#6229](https://github.com/vllm-project/vllm-ascend/pull/6229)" +"**Memcache 池**:修复了启用 memcache 池时服务启动失败的问题。 [#6229](https://github.com" +"/vllm-project/vllm-ascend/pull/6229)" #: ../../source/user_guide/release_notes.md:231 msgid "" "**AddRMSNormQuant**: Fixed AddRMSNormQuant not taking effect. " "[#6620](https://github.com/vllm-project/vllm-ascend/pull/6620)" msgstr "" -"**AddRMSNormQuant**:修复了 AddRMSNormQuant 未生效的问题。 [#6620](https://github.com/vllm-project/vllm-ascend/pull/6620)" +"**AddRMSNormQuant**:修复了 AddRMSNormQuant 未生效的问题。 " +"[#6620](https://github.com/vllm-project/vllm-ascend/pull/6620)" #: ../../source/user_guide/release_notes.md:232 msgid "" "**Pooling Code**: Fixed pooling code issues and updated usage guide. " "[#6126](https://github.com/vllm-project/vllm-ascend/pull/6126)" msgstr "" -"**池化代码**:修复了池化代码问题并更新了使用指南。 [#6126](https://github.com/vllm-project/vllm-ascend/pull/6126)" +"**池化代码**:修复了池化代码问题并更新了使用指南。 [#6126](https://github.com/vllm-project/vllm-" +"ascend/pull/6126)" #: ../../source/user_guide/release_notes.md:233 msgid "" "**Context Parallel**: Fixed and unified the PD request discrimination " "logic. [#5939](https://github.com/vllm-project/vllm-ascend/pull/5939)" msgstr "" -"**上下文并行**:修复并统一了 PD 请求判别逻辑。 [#5939](https://github.com/vllm-project/vllm-ascend/pull/5939)" +"**上下文并行**:修复并统一了 PD 请求判别逻辑。 [#5939](https://github.com/vllm-project/vllm-" +"ascend/pull/5939)" #: ../../source/user_guide/release_notes.md:234 msgid "" @@ -1299,7 +1477,9 @@ msgid "" "/vllm-ascend/pull/6513) [#6430](https://github.com/vllm-project/vllm-" "ascend/pull/6430)" msgstr "" -"**npugraph_ex**:修复了重复模式问题,并为 allreduce rmsnorm 融合通道添加了额外检查。 [#6513](https://github.com/vllm-project/vllm-ascend/pull/6513) [#6430](https://github.com/vllm-project/vllm-ascend/pull/6430)" +"**npugraph_ex**:修复了重复模式问题,并为 allreduce rmsnorm 融合通道添加了额外检查。 " +"[#6513](https://github.com/vllm-project/vllm-ascend/pull/6513) " +"[#6430](https://github.com/vllm-project/vllm-ascend/pull/6430)" #: ../../source/user_guide/release_notes.md:235 msgid "" @@ -1307,7 +1487,8 @@ msgid "" "vLLM v0.14.1. [#6286](https://github.com/vllm-project/vllm-" "ascend/pull/6286)" msgstr "" -"**RecomputeScheduler**:修复了 RecomputeScheduler 与 vLLM v0.14.1 的兼容性问题。 [#6286](https://github.com/vllm-project/vllm-ascend/pull/6286)" +"**RecomputeScheduler**:修复了 RecomputeScheduler 与 vLLM v0.14.1 的兼容性问题。 " +"[#6286](https://github.com/vllm-project/vllm-ascend/pull/6286)" #: ../../source/user_guide/release_notes.md:237 msgid "v0.13.0 - 2026.02.06" @@ -1319,7 +1500,8 @@ msgid "" "[official doc](https://docs.vllm.ai/projects/ascend/en/v0.13.0/) to get " "started." msgstr "" -"这是 vLLM Ascend v0.13.0 的最终版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.13.0/)开始使用。" +"这是 vLLM Ascend v0.13.0 " +"的最终版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.13.0/)开始使用。" #: ../../source/user_guide/release_notes.md:243 msgid "**Model Support**" @@ -1335,7 +1517,12 @@ msgid "" "ascend/pull/4191) [#4805](https://github.com/vllm-project/vllm-" "ascend/pull/4805)" msgstr "" -"**DeepSeek-R1 & DeepSeek-V3.2**:[实验性]性能优化和异步调度增强。 [#3631](https://github.com/vllm-project/vllm-ascend/pull/3631) [#3900](https://github.com/vllm-project/vllm-ascend/pull/3900) [#3908](https://github.com/vllm-project/vllm-ascend/pull/3908) [#4191](https://github.com/vllm-project/vllm-ascend/pull/4191) [#4805](https://github.com/vllm-project/vllm-ascend/pull/4805)" +"**DeepSeek-R1 & DeepSeek-V3.2**:[实验性]性能优化和异步调度增强。 " +"[#3631](https://github.com/vllm-project/vllm-ascend/pull/3631) " +"[#3900](https://github.com/vllm-project/vllm-ascend/pull/3900) " +"[#3908](https://github.com/vllm-project/vllm-ascend/pull/3908) " +"[#4191](https://github.com/vllm-project/vllm-ascend/pull/4191) " +"[#4805](https://github.com/vllm-project/vllm-ascend/pull/4805)" #: ../../source/user_guide/release_notes.md:246 msgid "" @@ -1353,7 +1540,17 @@ msgid "" "ascend/pull/4477) [#4770](https://github.com/vllm-project/vllm-" "ascend/pull/4770)" msgstr "" -"**Qwen3-Next**:[实验性]全面支持 Qwen3-Next 系列模型,包括 80B-A3B-Instruct,支持全图模式、MTP、量化(W8A8)、NZ 优化和分块预填充。修复了多个准确性和稳定性问题。 [#3450](https://github.com/vllm-project/vllm-ascend/pull/3450) [#3572](https://github.com/vllm-project/vllm-ascend/pull/3572) [#3428](https://github.com/vllm-project/vllm-ascend/pull/3428) [#3918](https://github.com/vllm-project/vllm-ascend/pull/3918) [#4058](https://github.com/vllm-project/vllm-ascend/pull/4058) [#4245](https://github.com/vllm-project/vllm-ascend/pull/4245) [#4070](https://github.com/vllm-project/vllm-ascend/pull/4070) [#4477](https://github.com/vllm-project/vllm-ascend/pull/4477) [#4770](https://github.com/vllm-project/vllm-ascend/pull/4770)" +"**Qwen3-Next**:[实验性]全面支持 Qwen3-Next 系列模型,包括 80B-A3B-" +"Instruct,支持全图模式、MTP、量化(W8A8)、NZ 优化和分块预填充。修复了多个准确性和稳定性问题。 " +"[#3450](https://github.com/vllm-project/vllm-ascend/pull/3450) " +"[#3572](https://github.com/vllm-project/vllm-ascend/pull/3572) " +"[#3428](https://github.com/vllm-project/vllm-ascend/pull/3428) " +"[#3918](https://github.com/vllm-project/vllm-ascend/pull/3918) " +"[#4058](https://github.com/vllm-project/vllm-ascend/pull/4058) " +"[#4245](https://github.com/vllm-project/vllm-ascend/pull/4245) " +"[#4070](https://github.com/vllm-project/vllm-ascend/pull/4070) " +"[#4477](https://github.com/vllm-project/vllm-ascend/pull/4477) " +"[#4770](https://github.com/vllm-project/vllm-ascend/pull/4770)" #: ../../source/user_guide/release_notes.md:247 msgid "" @@ -1362,21 +1559,25 @@ msgid "" "/vllm-ascend/pull/3796) [#3964](https://github.com/vllm-project/vllm-" "ascend/pull/3964)" msgstr "" -"**InternVL**:新增对 InternVL 模型的支持,包含全面的端到端测试和准确性评估。 [#3796](https://github.com/vllm-project/vllm-ascend/pull/3796) [#3964](https://github.com/vllm-project/vllm-ascend/pull/3964)" +"**InternVL**:新增对 InternVL 模型的支持,包含全面的端到端测试和准确性评估。 " +"[#3796](https://github.com/vllm-project/vllm-ascend/pull/3796) " +"[#3964](https://github.com/vllm-project/vllm-ascend/pull/3964)" #: ../../source/user_guide/release_notes.md:248 msgid "" "**LongCat-Flash**: [Experimental]Added support for LongCat-Flash model. " "[#3833](https://github.com/vllm-project/vllm-ascend/pull/3833)" msgstr "" -"**LongCat-Flash**:[实验性]新增对 LongCat-Flash 模型的支持。 [#3833](https://github.com/vllm-project/vllm-ascend/pull/3833)" +"**LongCat-Flash**:[实验性]新增对 LongCat-Flash 模型的支持。 " +"[#3833](https://github.com/vllm-project/vllm-ascend/pull/3833)" #: ../../source/user_guide/release_notes.md:249 msgid "" "**minimax_m2**: [Experimental]Added support for minimax_m2 model. " "[#5624](https://github.com/vllm-project/vllm-ascend/pull/5624)" msgstr "" -"**minimax_m2**:[实验性]新增对 minimax_m2 模型的支持。 [#5624](https://github.com/vllm-project/vllm-ascend/pull/5624)" +"**minimax_m2**:[实验性]新增对 minimax_m2 模型的支持。 [#5624](https://github.com" +"/vllm-project/vllm-ascend/pull/5624)" #: ../../source/user_guide/release_notes.md:250 msgid "" @@ -1384,7 +1585,8 @@ msgid "" "attention and Whisper models. [#5592](https://github.com/vllm-project" "/vllm-ascend/pull/5592)" msgstr "" -"**Whisper 与交叉注意力**:[实验性]新增对交叉注意力和 Whisper 模型的支持。 [#5592](https://github.com/vllm-project/vllm-ascend/pull/5592)" +"**Whisper 与交叉注意力**:[实验性]新增对交叉注意力和 Whisper 模型的支持。 " +"[#5592](https://github.com/vllm-project/vllm-ascend/pull/5592)" #: ../../source/user_guide/release_notes.md:251 msgid "" @@ -1396,14 +1598,20 @@ msgid "" "[#6057](https://github.com/vllm-project/vllm-ascend/pull/6057) " "[#6146](https://github.com/vllm-project/vllm-ascend/pull/6146)" msgstr "" -"**池化模型**:[实验性]新增对池化模型的支持,包含 PCP 适配,并修复了多个与池化相关的缺陷。 [#3122](https://github.com/vllm-project/vllm-ascend/pull/3122) [#4143](https://github.com/vllm-project/vllm-ascend/pull/4143) [#6056](https://github.com/vllm-project/vllm-ascend/pull/6056) [#6057](https://github.com/vllm-project/vllm-ascend/pull/6057) [#6146](https://github.com/vllm-project/vllm-ascend/pull/6146)" +"**池化模型**:[实验性]新增对池化模型的支持,包含 PCP 适配,并修复了多个与池化相关的缺陷。 " +"[#3122](https://github.com/vllm-project/vllm-ascend/pull/3122) " +"[#4143](https://github.com/vllm-project/vllm-ascend/pull/4143) " +"[#6056](https://github.com/vllm-project/vllm-ascend/pull/6056) " +"[#6057](https://github.com/vllm-project/vllm-ascend/pull/6057) " +"[#6146](https://github.com/vllm-project/vllm-ascend/pull/6146)" #: ../../source/user_guide/release_notes.md:252 msgid "" "**PanguUltraMoE**: [Experimental]Added support for PanguUltraMoE model. " "[#4615](https://github.com/vllm-project/vllm-ascend/pull/4615)" msgstr "" -"**PanguUltraMoE**:[实验性]新增对 PanguUltraMoE 模型的支持。 [#4615](https://github.com/vllm-project/vllm-ascend/pull/4615)" +"**PanguUltraMoE**:[实验性]新增对 PanguUltraMoE 模型的支持。 " +"[#4615](https://github.com/vllm-project/vllm-ascend/pull/4615)" #: ../../source/user_guide/release_notes.md:254 msgid "**Core Features**" @@ -1424,15 +1632,17 @@ msgid "" "[#4183](https://github.com/vllm-project/vllm-ascend/pull/4183) " "[#5672](https://github.com/vllm-project/vllm-ascend/pull/5672)" msgstr "" -"**上下文并行 (PCP/DCP)**: [实验性] 新增对预填充上下文并行 (PCP) 和解码上下文并行 (DCP) 的全面支持,集成了 ACLGraph、MTP、分块预填充、MLAPO 和 Mooncake 连接器。此为实验性功能,欢迎反馈。" -"[#3260](https://github.com/vllm-project/vllm-ascend/pull/3260) " -"[#3731](https://github.com/vllm-project/vllm-ascend/pull/3731) " -"[#3801](https://github.com/vllm-project/vllm-ascend/pull/3801) " -"[#3980](https://github.com/vllm-project/vllm-ascend/pull/3980) " -"[#4066](https://github.com/vllm-project/vllm-ascend/pull/4066) " -"[#4098](https://github.com/vllm-project/vllm-ascend/pull/4098) " -"[#4183](https://github.com/vllm-project/vllm-ascend/pull/4183) " -"[#5672](https://github.com/vllm-project/vllm-ascend/pull/5672)" +"**上下文并行 (PCP/DCP)**: [实验性] 新增对预填充上下文并行 (PCP) 和解码上下文并行 (DCP) 的全面支持,集成了 " +"ACLGraph、MTP、分块预填充、MLAPO 和 Mooncake " +"连接器。此为实验性功能,欢迎反馈。[#3260](https://github.com/vllm-project/vllm-" +"ascend/pull/3260) [#3731](https://github.com/vllm-project/vllm-" +"ascend/pull/3731) [#3801](https://github.com/vllm-project/vllm-" +"ascend/pull/3801) [#3980](https://github.com/vllm-project/vllm-" +"ascend/pull/3980) [#4066](https://github.com/vllm-project/vllm-" +"ascend/pull/4066) [#4098](https://github.com/vllm-project/vllm-" +"ascend/pull/4098) [#4183](https://github.com/vllm-project/vllm-" +"ascend/pull/4183) [#5672](https://github.com/vllm-project/vllm-" +"ascend/pull/5672)" #: ../../source/user_guide/release_notes.md:257 msgid "" @@ -1447,14 +1657,15 @@ msgid "" "ascend/pull/3894) [#5118](https://github.com/vllm-project/vllm-" "ascend/pull/5118)" msgstr "" -"**全图模式 (ACLGraph)**: [实验性] 增强了全图模式,支持 GQA,进行了内存优化,统一了 ACLGraph 与 Torchair 之间的逻辑,并提升了稳定性。" -"[#3560](https://github.com/vllm-project/vllm-ascend/pull/3560) " -"[#3970](https://github.com/vllm-project/vllm-ascend/pull/3970) " -"[#3812](https://github.com/vllm-project/vllm-ascend/pull/3812) " -"[#3879](https://github.com/vllm-project/vllm-ascend/pull/3879) " -"[#3888](https://github.com/vllm-project/vllm-ascend/pull/3888) " -"[#3894](https://github.com/vllm-project/vllm-ascend/pull/3894) " -"[#5118](https://github.com/vllm-project/vllm-ascend/pull/5118)" +"**全图模式 (ACLGraph)**: [实验性] 增强了全图模式,支持 GQA,进行了内存优化,统一了 ACLGraph 与 Torchair" +" 之间的逻辑,并提升了稳定性。[#3560](https://github.com/vllm-project/vllm-" +"ascend/pull/3560) [#3970](https://github.com/vllm-project/vllm-" +"ascend/pull/3970) [#3812](https://github.com/vllm-project/vllm-" +"ascend/pull/3812) [#3879](https://github.com/vllm-project/vllm-" +"ascend/pull/3879) [#3888](https://github.com/vllm-project/vllm-" +"ascend/pull/3888) [#3894](https://github.com/vllm-project/vllm-" +"ascend/pull/3894) [#5118](https://github.com/vllm-project/vllm-" +"ascend/pull/5118)" #: ../../source/user_guide/release_notes.md:258 msgid "" @@ -1473,17 +1684,18 @@ msgid "" "ascend/pull/4770) [#5477](https://github.com/vllm-project/vllm-" "ascend/pull/5477)" msgstr "" -"**多令牌预测 (MTP)**: 显著改进了 MTP 支持,包括针对 DeepSeek 的分块预填充、量化支持、全图模式、PCP/DCP 集成和异步调度。MTP 现在在大多数情况下可用,推荐使用。" -"[#2711](https://github.com/vllm-project/vllm-ascend/pull/2711) " -"[#2713](https://github.com/vllm-project/vllm-ascend/pull/2713) " -"[#3620](https://github.com/vllm-project/vllm-ascend/pull/3620) " -"[#3845](https://github.com/vllm-project/vllm-ascend/pull/3845) " -"[#3910](https://github.com/vllm-project/vllm-ascend/pull/3910) " -"[#3915](https://github.com/vllm-project/vllm-ascend/pull/3915) " -"[#4102](https://github.com/vllm-project/vllm-ascend/pull/4102) " -"[#4111](https://github.com/vllm-project/vllm-ascend/pull/4111) " -"[#4770](https://github.com/vllm-project/vllm-ascend/pull/4770) " -"[#5477](https://github.com/vllm-project/vllm-ascend/pull/5477)" +"**多令牌预测 (MTP)**: 显著改进了 MTP 支持,包括针对 DeepSeek 的分块预填充、量化支持、全图模式、PCP/DCP " +"集成和异步调度。MTP 现在在大多数情况下可用,推荐使用。[#2711](https://github.com/vllm-project" +"/vllm-ascend/pull/2711) [#2713](https://github.com/vllm-project/vllm-" +"ascend/pull/2713) [#3620](https://github.com/vllm-project/vllm-" +"ascend/pull/3620) [#3845](https://github.com/vllm-project/vllm-" +"ascend/pull/3845) [#3910](https://github.com/vllm-project/vllm-" +"ascend/pull/3910) [#3915](https://github.com/vllm-project/vllm-" +"ascend/pull/3915) [#4102](https://github.com/vllm-project/vllm-" +"ascend/pull/4102) [#4111](https://github.com/vllm-project/vllm-" +"ascend/pull/4111) [#4770](https://github.com/vllm-project/vllm-" +"ascend/pull/4770) [#5477](https://github.com/vllm-project/vllm-" +"ascend/pull/5477)" #: ../../source/user_guide/release_notes.md:259 msgid "" @@ -1493,10 +1705,10 @@ msgid "" "ascend/pull/4893) [#5804](https://github.com/vllm-project/vllm-" "ascend/pull/5804)" msgstr "" -"**Eagle 推测解码**: Eagle 推测解码现在可与全图模式协同工作,且更加稳定。" -"[#5118](https://github.com/vllm-project/vllm-ascend/pull/5118) " -"[#4893](https://github.com/vllm-project/vllm-ascend/pull/4893) " -"[#5804](https://github.com/vllm-project/vllm-ascend/pull/5804)" +"**Eagle 推测解码**: Eagle 推测解码现在可与全图模式协同工作,且更加稳定。[#5118](https://github.com" +"/vllm-project/vllm-ascend/pull/5118) [#4893](https://github.com/vllm-" +"project/vllm-ascend/pull/4893) [#5804](https://github.com/vllm-project" +"/vllm-ascend/pull/5804)" #: ../../source/user_guide/release_notes.md:260 msgid "" @@ -1508,8 +1720,8 @@ msgid "" "[#5008](https://github.com/vllm-project/vllm-ascend/pull/5008) " "[#3072](https://github.com/vllm-project/vllm-ascend/pull/3072)" msgstr "" -"**PD 解耦**: 将 ADXL 引擎设置为解耦预填充的默认后端,提升了性能和稳定性。为 DeepSeek 解码节点增加了 KV NZ 功能支持。" -"[#3761](https://github.com/vllm-project/vllm-ascend/pull/3761) " +"**PD 解耦**: 将 ADXL 引擎设置为解耦预填充的默认后端,提升了性能和稳定性。为 DeepSeek 解码节点增加了 KV NZ " +"功能支持。[#3761](https://github.com/vllm-project/vllm-ascend/pull/3761) " "[#3950](https://github.com/vllm-project/vllm-ascend/pull/3950) " "[#5008](https://github.com/vllm-project/vllm-ascend/pull/5008) " "[#3072](https://github.com/vllm-project/vllm-ascend/pull/3072)" @@ -1525,12 +1737,13 @@ msgid "" "ascend/pull/4183) [#5303](https://github.com/vllm-project/vllm-" "ascend/pull/5303)" msgstr "" -"**KV 池 & Mooncake**: 增强了 KV 池,支持用于 PCP/DCP 的 Mooncake 连接器、多输入后缀,并提升了 Layerwise 连接器的性能。" -"[#3690](https://github.com/vllm-project/vllm-ascend/pull/3690) " -"[#3752](https://github.com/vllm-project/vllm-ascend/pull/3752) " -"[#3849](https://github.com/vllm-project/vllm-ascend/pull/3849) " -"[#4183](https://github.com/vllm-project/vllm-ascend/pull/4183) " -"[#5303](https://github.com/vllm-project/vllm-ascend/pull/5303)" +"**KV 池 & Mooncake**: 增强了 KV 池,支持用于 PCP/DCP 的 Mooncake 连接器、多输入后缀,并提升了 " +"Layerwise 连接器的性能。[#3690](https://github.com/vllm-project/vllm-" +"ascend/pull/3690) [#3752](https://github.com/vllm-project/vllm-" +"ascend/pull/3752) [#3849](https://github.com/vllm-project/vllm-" +"ascend/pull/3849) [#4183](https://github.com/vllm-project/vllm-" +"ascend/pull/4183) [#5303](https://github.com/vllm-project/vllm-" +"ascend/pull/5303)" #: ../../source/user_guide/release_notes.md:262 msgid "" @@ -1538,8 +1751,9 @@ msgid "" " stable with many bug fixes. Mix placement now works. " "[#6086](https://github.com/vllm-project/vllm-ascend/pull/6086)" msgstr "" -"**EPLB (弹性预填充负载均衡)**: [实验性] EPLB 现在更加稳定,修复了许多错误。混合放置现已可用。" -"[#6086](https://github.com/vllm-project/vllm-ascend/pull/6086)" +"**EPLB (弹性预填充负载均衡)**: [实验性] EPLB " +"现在更加稳定,修复了许多错误。混合放置现已可用。[#6086](https://github.com/vllm-project/vllm-" +"ascend/pull/6086)" #: ../../source/user_guide/release_notes.md:263 msgid "" @@ -1549,10 +1763,11 @@ msgid "" "/vllm-ascend/pull/3986) [#3763](https://github.com/vllm-project/vllm-" "ascend/pull/3763)" msgstr "" -"**纯解码模式**: 在 full_decode_only 模式下增加了对 Qwen3-Next 和 DeepSeekv32 的支持,并修复了相关错误。" -"[#3949](https://github.com/vllm-project/vllm-ascend/pull/3949) " -"[#3986](https://github.com/vllm-project/vllm-ascend/pull/3986) " -"[#3763](https://github.com/vllm-project/vllm-ascend/pull/3763)" +"**纯解码模式**: 在 full_decode_only 模式下增加了对 Qwen3-Next 和 DeepSeekv32 " +"的支持,并修复了相关错误。[#3949](https://github.com/vllm-project/vllm-" +"ascend/pull/3949) [#3986](https://github.com/vllm-project/vllm-" +"ascend/pull/3986) [#3763](https://github.com/vllm-project/vllm-" +"ascend/pull/3763)" #: ../../source/user_guide/release_notes.md:264 msgid "" @@ -1560,16 +1775,17 @@ msgid "" "V2, the next generation of vLLM. It will be used by default in future " "releases. [#5210](https://github.com/vllm-project/vllm-ascend/pull/5210)" msgstr "" -"**Model Runner V2**: [实验性] 新增对下一代 vLLM 的 Model Runner V2 的基本支持。它将在未来的版本中默认启用。" -"[#5210](https://github.com/vllm-project/vllm-ascend/pull/5210)" +"**Model Runner V2**: [实验性] 新增对下一代 vLLM 的 Model Runner V2 " +"的基本支持。它将在未来的版本中默认启用。[#5210](https://github.com/vllm-project/vllm-" +"ascend/pull/5210)" #: ../../source/user_guide/release_notes.md:268 msgid "" "**W8A16 Quantization**: [Experimental]Added new W8A16 quantization method" " support. [#4541](https://github.com/vllm-project/vllm-ascend/pull/4541)" msgstr "" -"**W8A16 量化**: [实验性] 新增对 W8A16 量化方法的支持。" -"[#4541](https://github.com/vllm-project/vllm-ascend/pull/4541)" +"**W8A16 量化**: [实验性] 新增对 W8A16 量化方法的支持。[#4541](https://github.com/vllm-" +"project/vllm-ascend/pull/4541)" #: ../../source/user_guide/release_notes.md:269 msgid "" @@ -1577,8 +1793,8 @@ msgid "" "Offloading. [#4411](https://github.com/vllm-project/vllm-" "ascend/pull/4411)" msgstr "" -"**UCM 连接器**: [实验性] 新增用于 KV 缓存卸载的 UCMConnector。" -"[#4411](https://github.com/vllm-project/vllm-ascend/pull/4411)" +"**UCM 连接器**: [实验性] 新增用于 KV 缓存卸载的 UCMConnector。[#4411](https://github.com" +"/vllm-project/vllm-ascend/pull/4411)" #: ../../source/user_guide/release_notes.md:270 msgid "" @@ -1586,8 +1802,8 @@ msgid "" "invariant feature. [#5517](https://github.com/vllm-project/vllm-" "ascend/pull/5517)" msgstr "" -"**批次不变性**: [实验性] 实现了批次不变性功能的基本框架。" -"[#5517](https://github.com/vllm-project/vllm-ascend/pull/5517)" +"**批次不变性**: [实验性] 实现了批次不变性功能的基本框架。[#5517](https://github.com/vllm-project" +"/vllm-ascend/pull/5517)" #: ../../source/user_guide/release_notes.md:271 msgid "" @@ -1595,8 +1811,9 @@ msgid "" "disable_padded_drafter_batch support in Eagle. [#4893](https://github.com" "/vllm-project/vllm-ascend/pull/4893)" msgstr "" -"**采样**: 增强了采样功能,在 Eagle 中支持 async_scheduler 和 disable_padded_drafter_batch。" -"[#4893](https://github.com/vllm-project/vllm-ascend/pull/4893)" +"**采样**: 增强了采样功能,在 Eagle 中支持 async_scheduler 和 " +"disable_padded_drafter_batch。[#4893](https://github.com/vllm-project" +"/vllm-ascend/pull/4893)" #: ../../source/user_guide/release_notes.md:275 msgid "**Custom Operators**: Added multiple custom operators including:" @@ -1607,16 +1824,14 @@ msgid "" "Fused matmul/reduce-scatter kernel [#3693](https://github.com/vllm-" "project/vllm-ascend/pull/3693)" msgstr "" -"融合的 matmul/reduce-scatter 内核 " -"[#3693](https://github.com/vllm-project/vllm-ascend/pull/3693)" +"融合的 matmul/reduce-scatter 内核 [#3693](https://github.com/vllm-project" +"/vllm-ascend/pull/3693)" #: ../../source/user_guide/release_notes.md:277 msgid "" "mrope fusion op [#3708](https://github.com/vllm-project/vllm-" "ascend/pull/3708)" -msgstr "" -"mrope 融合算子 " -"[#3708](https://github.com/vllm-project/vllm-ascend/pull/3708)" +msgstr "mrope 融合算子 [#3708](https://github.com/vllm-project/vllm-ascend/pull/3708)" #: ../../source/user_guide/release_notes.md:278 msgid "" @@ -1631,8 +1846,8 @@ msgid "" "l2norm triton kernel [#4595](https://github.com/vllm-project/vllm-" "ascend/pull/4595)" msgstr "" -"l2norm triton 内核 " -"[#4595](https://github.com/vllm-project/vllm-ascend/pull/4595)" +"l2norm triton 内核 [#4595](https://github.com/vllm-project/vllm-" +"ascend/pull/4595)" #: ../../source/user_guide/release_notes.md:280 msgid "RejectSampler, MoeInitRoutingCustom, DispatchFFNCombine custom ops" @@ -1645,8 +1860,8 @@ msgid "" "project/vllm-ascend/pull/5077) [#4168](https://github.com/vllm-project" "/vllm-ascend/pull/4168)" msgstr "" -"**算子融合**: 新增了支持 SP 的 AddRmsnormQuant 融合模式以及用于量化的 inductor 融合。" -"[#5077](https://github.com/vllm-project/vllm-ascend/pull/5077) " +"**算子融合**: 新增了支持 SP 的 AddRmsnormQuant 融合模式以及用于量化的 inductor " +"融合。[#5077](https://github.com/vllm-project/vllm-ascend/pull/5077) " "[#4168](https://github.com/vllm-project/vllm-ascend/pull/4168)" #: ../../source/user_guide/release_notes.md:282 @@ -1655,8 +1870,8 @@ msgid "" "maintainability. [#3769](https://github.com/vllm-project/vllm-" "ascend/pull/3769)" msgstr "" -"**MLA/SFA**: 将 SFA 重构为 MLA 架构,以提高可维护性。" -"[#3769](https://github.com/vllm-project/vllm-ascend/pull/3769)" +"**MLA/SFA**: 将 SFA 重构为 MLA 架构,以提高可维护性。[#3769](https://github.com/vllm-" +"project/vllm-ascend/pull/3769)" #: ../../source/user_guide/release_notes.md:283 msgid "" @@ -1666,8 +1881,10 @@ msgid "" "[FAQs](https://docs.vllm.ai/projects/ascend/en/v0.13.0/faqs.html) to " "enable it. [#4025](https://github.com/vllm-project/vllm-ascend/pull/4025)" msgstr "" -"**FIA 算子**: 适配了具有 flash decoding 功能的 npu_fused_infer_attention_score。为优化小批量场景下的性能,此注意力算子现已可用。请参阅 [常见问题](https://docs.vllm.ai/projects/ascend/en/v0.13.0/faqs.html) 中的第 22 项以启用它。" -"[#4025](https://github.com/vllm-project/vllm-ascend/pull/4025)" +"**FIA 算子**: 适配了具有 flash decoding 功能的 " +"npu_fused_infer_attention_score。为优化小批量场景下的性能,此注意力算子现已可用。请参阅 " +"[常见问题](https://docs.vllm.ai/projects/ascend/en/v0.13.0/faqs.html) 中的第 22 " +"项以启用它。[#4025](https://github.com/vllm-project/vllm-ascend/pull/4025)" #: ../../source/user_guide/release_notes.md:284 msgid "" @@ -1675,8 +1892,8 @@ msgid "" "enables for CANN 8.5. [#6039](https://github.com/vllm-project/vllm-" "ascend/pull/6039)" msgstr "" -"**CANN 8.5 支持**: 在启用 FIA 算子后,为 CANN 8.5 移除了 CP 冗余变量。" -"[#6039](https://github.com/vllm-project/vllm-ascend/pull/6039)" +"**CANN 8.5 支持**: 在启用 FIA 算子后,为 CANN 8.5 移除了 CP " +"冗余变量。[#6039](https://github.com/vllm-project/vllm-ascend/pull/6039)" #: ../../source/user_guide/release_notes.md:288 msgid "" @@ -1692,8 +1909,8 @@ msgid "" "/vllm-ascend/pull/4805) [#2713](https://github.com/vllm-project/vllm-" "ascend/pull/2713)" msgstr "" -"**DeepSeek 性能**: [实验性] 通过消除异步调度中的 HD 同步以及优化 MTP 的内存使用,提升了 DeepSeek V3.2 的性能。" -"[#4805](https://github.com/vllm-project/vllm-ascend/pull/4805) " +"**DeepSeek 性能**: [实验性] 通过消除异步调度中的 HD 同步以及优化 MTP 的内存使用,提升了 DeepSeek V3.2 " +"的性能。[#4805](https://github.com/vllm-project/vllm-ascend/pull/4805) " "[#2713](https://github.com/vllm-project/vllm-ascend/pull/2713)" #: ../../source/user_guide/release_notes.md:291 @@ -1704,10 +1921,10 @@ msgid "" "ascend/pull/5984) [#5765](https://github.com/vllm-project/vllm-" "ascend/pull/5765)" msgstr "" -"**Qwen3-Next 性能**: [实验性] 通过 Triton 算子和优化提升了性能。" -"[#5664](https://github.com/vllm-project/vllm-ascend/pull/5664) " -"[#5984](https://github.com/vllm-project/vllm-ascend/pull/5984) " -"[#5765](https://github.com/vllm-project/vllm-ascend/pull/5765)" +"**Qwen3-Next 性能**: [实验性] 通过 Triton 算子和优化提升了性能。[#5664](https://github.com" +"/vllm-project/vllm-ascend/pull/5664) [#5984](https://github.com/vllm-" +"project/vllm-ascend/pull/5984) [#5765](https://github.com/vllm-project" +"/vllm-ascend/pull/5765)" #: ../../source/user_guide/release_notes.md:292 msgid "" @@ -1718,8 +1935,8 @@ msgid "" "ascend/pull/4458) [#5848](https://github.com/vllm-project/vllm-" "ascend/pull/5848)" msgstr "" -"**FlashComm**: 增强了 FlashComm v2 优化,包括 o_shared linear 和通信域修复。" -"[#3232](https://github.com/vllm-project/vllm-ascend/pull/3232) " +"**FlashComm**: 增强了 FlashComm v2 优化,包括 o_shared linear " +"和通信域修复。[#3232](https://github.com/vllm-project/vllm-ascend/pull/3232) " "[#4188](https://github.com/vllm-project/vllm-ascend/pull/4188) " "[#4458](https://github.com/vllm-project/vllm-ascend/pull/4458) " "[#5848](https://github.com/vllm-project/vllm-ascend/pull/5848)" @@ -1731,8 +1948,8 @@ msgid "" "ascend/pull/3738) [#5329](https://github.com/vllm-project/vllm-" "ascend/pull/5329)" msgstr "" -"**MoE 优化**: 针对 MoE 模型优化了 all2allv,并增强了 all-reduce 跳过逻辑。" -"[#3738](https://github.com/vllm-project/vllm-ascend/pull/3738) " +"**MoE 优化**: 针对 MoE 模型优化了 all2allv,并增强了 all-reduce " +"跳过逻辑。[#3738](https://github.com/vllm-project/vllm-ascend/pull/3738) " "[#5329](https://github.com/vllm-project/vllm-ascend/pull/5329)" #: ../../source/user_guide/release_notes.md:294 @@ -1744,10 +1961,11 @@ msgid "" "[#3778](https://github.com/vllm-project/vllm-ascend/pull/3778) " "[#5390](https://github.com/vllm-project/vllm-ascend/pull/5390)" msgstr "" -"**注意力优化**: 将注意力更新流移出循环,为长序列优化将 BSND 格式转换为 TND 格式,并在注意力切换到 transpose_batchmatmul 后移除了转置步骤。" -"[#3848](https://github.com/vllm-project/vllm-ascend/pull/3848) " -"[#3778](https://github.com/vllm-project/vllm-ascend/pull/3778) " -"[#5390](https://github.com/vllm-project/vllm-ascend/pull/5390)" +"**注意力优化**: 将注意力更新流移出循环,为长序列优化将 BSND 格式转换为 TND 格式,并在注意力切换到 " +"transpose_batchmatmul 后移除了转置步骤。[#3848](https://github.com/vllm-project" +"/vllm-ascend/pull/3848) [#3778](https://github.com/vllm-project/vllm-" +"ascend/pull/3778) [#5390](https://github.com/vllm-project/vllm-" +"ascend/pull/5390)" #: ../../source/user_guide/release_notes.md:295 msgid "" @@ -1755,8 +1973,8 @@ msgid "" "Allgather EP. [#3420](https://github.com/vllm-project/vllm-" "ascend/pull/3420)" msgstr "" -"**量化性能**: 在 Allgather EP 中将量化操作移至 allgather 之前。" -"[#3420](https://github.com/vllm-project/vllm-ascend/pull/3420)" +"**量化性能**:在 Allgather EP 中将量化操作移至 allgather 之前。[#3420](https://github.com" +"/vllm-project/vllm-ascend/pull/3420)" #: ../../source/user_guide/release_notes.md:296 msgid "" @@ -1781,8 +1999,8 @@ msgid "" "ascend/pull/4113) [#4233](https://github.com/vllm-project/vllm-" "ascend/pull/4233)" msgstr "" -"**异步调度**:修复了异步复制问题,并消除了异步调度中的挂起现象。[#4113](https://github.com/vllm-project/vllm-" -"ascend/pull/4113) [#4233](https://github.com/vllm-project/vllm-" +"**异步调度**:修复了异步复制问题,并消除了异步调度中的挂起现象。[#4113](https://github.com/vllm-project" +"/vllm-ascend/pull/4113) [#4233](https://github.com/vllm-project/vllm-" "ascend/pull/4233)" #: ../../source/user_guide/release_notes.md:299 @@ -1792,9 +2010,9 @@ msgid "" "project/vllm-ascend/pull/4063) [#3677](https://github.com/vllm-project" "/vllm-ascend/pull/3677)" msgstr "" -"**内存操作**:移除了冗余的 D2H 操作,并删除了 model_runner 中的冗余操作。[#4063](https://github.com/vllm-" -"project/vllm-ascend/pull/4063) [#3677](https://github.com/vllm-project/vllm-" -"ascend/pull/3677)" +"**内存操作**:移除了冗余的 D2H 操作,并删除了 model_runner " +"中的冗余操作。[#4063](https://github.com/vllm-project/vllm-ascend/pull/4063) " +"[#3677](https://github.com/vllm-project/vllm-ascend/pull/3677)" #: ../../source/user_guide/release_notes.md:300 msgid "" @@ -1802,8 +2020,9 @@ msgid "" "performance gain. [#5918](https://github.com/vllm-project/vllm-" "ascend/pull/5918)" msgstr "" -"**Rope 嵌入**:使用 Triton 内核优化了 rope embedding,带来了巨大的性能提升。[#5918](https://github.com/vllm-" -"project/vllm-ascend/pull/5918)" +"**Rope 嵌入**:使用 Triton 内核优化了 rope " +"embedding,带来了巨大的性能提升。[#5918](https://github.com/vllm-project/vllm-" +"ascend/pull/5918)" #: ../../source/user_guide/release_notes.md:301 msgid "" @@ -1811,8 +2030,8 @@ msgid "" "constraint. [#6098](https://github.com/vllm-project/vllm-" "ascend/pull/6098)" msgstr "" -"**采样**:新增支持无 top_k 约束的高级 apply_top_k_top_p 功能。[#6098](https://github.com/vllm-" -"project/vllm-ascend/pull/6098)" +"**采样**:新增支持无 top_k 约束的高级 apply_top_k_top_p 功能。[#6098](https://github.com" +"/vllm-project/vllm-ascend/pull/6098)" #: ../../source/user_guide/release_notes.md:302 msgid "" @@ -1820,8 +2039,8 @@ msgid "" "for better performance. [#6204](https://github.com/vllm-project/vllm-" "ascend/pull/6204)" msgstr "" -"**多模态**:在 AscendMMEncoderAttention 中并行化 Q/K/V 填充以提升性能。[#6204](https://github.com/vllm-" -"project/vllm-ascend/pull/6204)" +"**多模态**:在 AscendMMEncoderAttention 中并行化 Q/K/V " +"填充以提升性能。[#6204](https://github.com/vllm-project/vllm-ascend/pull/6204)" #: ../../source/user_guide/release_notes.md:306 msgid "" @@ -1835,8 +2054,7 @@ msgstr "" msgid "" "**torch-npu**: Upgraded to 2.8.0.post2. It's installed in the docker " "container by default." -msgstr "" -"**torch-npu**:升级至 2.8.0.post2。默认已在 Docker 容器中安装。" +msgstr "**torch-npu**:升级至 2.8.0.post2。默认已在 Docker 容器中安装。" #: ../../source/user_guide/release_notes.md:308 msgid "" @@ -1851,23 +2069,22 @@ msgid "" "**vLLM**: Upgraded to 0.13.0 and dropped 0.12.0 support. " "[#5146](https://github.com/vllm-project/vllm-ascend/pull/5146)" msgstr "" -"**vLLM**:升级至 0.13.0 并停止支持 0.12.0。[#5146](https://github.com/vllm-project/vllm-" -"ascend/pull/5146)" +"**vLLM**:升级至 0.13.0 并停止支持 0.12.0。[#5146](https://github.com/vllm-project" +"/vllm-ascend/pull/5146)" #: ../../source/user_guide/release_notes.md:310 msgid "" "**Transformers**: Upgraded to >= 4.57.4 [#5250](https://github.com/vllm-" "project/vllm-ascend/pull/5250)" msgstr "" -"**Transformers**:升级至 >= 4.57.4 [#5250](https://github.com/vllm-project/vllm-" -"ascend/pull/5250)" +"**Transformers**:升级至 >= 4.57.4 [#5250](https://github.com/vllm-project" +"/vllm-ascend/pull/5250)" #: ../../source/user_guide/release_notes.md:314 msgid "" "**CPUOffloadingConnector** is deprecated. We'll remove it in the next " "release. It'll be replaced by CPUOffload feature from vLLM in the future." -msgstr "" -"**CPUOffloadingConnector** 已弃用。我们将在下一个版本中移除它。未来将由 vLLM 的 CPUOffload 功能替代。" +msgstr "**CPUOffloadingConnector** 已弃用。我们将在下一个版本中移除它。未来将由 vLLM 的 CPUOffload 功能替代。" #: ../../source/user_guide/release_notes.md:315 msgid "" @@ -1876,7 +2093,8 @@ msgid "" " is deprecated." msgstr "" "**ProfileExecuteDuration** " -"[功能](https://docs.vllm.ai/projects/ascend/en/v0.13.0/developer_guide/performance_and_debug/profile_execute_duration.html) 已弃用。" +"[功能](https://docs.vllm.ai/projects/ascend/en/v0.13.0/developer_guide/performance_and_debug/profile_execute_duration.html)" +" 已弃用。" #: ../../source/user_guide/release_notes.md:316 msgid "" @@ -1901,14 +2119,18 @@ msgid "" "always enabled together. [#5272](https://github.com/vllm-project/vllm-" "ascend/pull/5272)" msgstr "" -"**VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE** 已被移除,建议使用 `VLLM_ASCEND_ENABLE_PREFETCH_MLP` 替代,因为它们之前总是一起启用。[#5272](https://github.com/vllm-project/vllm-ascend/pull/5272)" +"**VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE** 已被移除,建议使用 " +"`VLLM_ASCEND_ENABLE_PREFETCH_MLP` " +"替代,因为它们之前总是一起启用。[#5272](https://github.com/vllm-project/vllm-" +"ascend/pull/5272)" #: ../../source/user_guide/release_notes.md:319 msgid "" "**VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP** is dropped now. " "[#5270](https://github.com/vllm-project/vllm-ascend/pull/5270)" msgstr "" -"**VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP** 现已被移除。[#5270](https://github.com/vllm-project/vllm-ascend/pull/5270)" +"**VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP** " +"现已被移除。[#5270](https://github.com/vllm-project/vllm-ascend/pull/5270)" #: ../../source/user_guide/release_notes.md:320 msgid "" @@ -1917,14 +2139,17 @@ msgid "" "to set it to 2 if you make sure it works for your case. " "[#4878](https://github.com/vllm-project/vllm-ascend/pull/4878)" msgstr "" -"对于浮点权重的情况,**VLLM_ASCEND_ENABLE_NZ** 已被禁用,因为我们注意到在某些浮点场景下性能不佳。如果您确定它适用于您的情况,可以将其设置为 2。[#4878](https://github.com/vllm-project/vllm-ascend/pull/4878)" +"对于浮点权重的情况,**VLLM_ASCEND_ENABLE_NZ** " +"已被禁用,因为我们注意到在某些浮点场景下性能不佳。如果您确定它适用于您的情况,可以将其设置为 " +"2。[#4878](https://github.com/vllm-project/vllm-ascend/pull/4878)" #: ../../source/user_guide/release_notes.md:321 msgid "" "**chunked_prefill_for_mla** in `additional_config` is dropped now. " "[#5296](https://github.com/vllm-project/vllm-ascend/pull/5296)" msgstr "" -"`additional_config` 中的 **chunked_prefill_for_mla** 现已被移除。[#5296](https://github.com/vllm-project/vllm-ascend/pull/5296)" +"`additional_config` 中的 **chunked_prefill_for_mla** " +"现已被移除。[#5296](https://github.com/vllm-project/vllm-ascend/pull/5296)" #: ../../source/user_guide/release_notes.md:322 msgid "" @@ -1932,14 +2157,17 @@ msgid "" "and the type is changed from `dict` to `string`. " "[#5296](https://github.com/vllm-project/vllm-ascend/pull/5296)" msgstr "" -"`additional_config` 中的 **dump_config** 已重命名为 `dump_config_path`,类型也从 `dict` 更改为 `string`。[#5296](https://github.com/vllm-project/vllm-ascend/pull/5296)" +"`additional_config` 中的 **dump_config** 已重命名为 `dump_config_path`,类型也从 " +"`dict` 更改为 `string`。[#5296](https://github.com/vllm-project/vllm-" +"ascend/pull/5296)" #: ../../source/user_guide/release_notes.md:323 msgid "" "**--task parameter** for embedding models is deprecated. " "[#5257](https://github.com/vllm-project/vllm-ascend/pull/5257)" msgstr "" -"用于嵌入模型的 **--task 参数** 已弃用。[#5257](https://github.com/vllm-project/vllm-ascend/pull/5257)" +"用于嵌入模型的 **--task 参数** 已弃用。[#5257](https://github.com/vllm-project/vllm-" +"ascend/pull/5257)" #: ../../source/user_guide/release_notes.md:324 msgid "" @@ -1948,7 +2176,8 @@ msgid "" "Please note that this feature will cost more memory. If you are memory " "sensitive, please set it to False." msgstr "" -"**VLLM_ASCEND_ENABLE_MLAPO** 环境变量的值将在下一个版本中默认设置为 True。它将在解码节点默认启用。请注意,此功能会消耗更多内存。如果您对内存敏感,请将其设置为 False。" +"**VLLM_ASCEND_ENABLE_MLAPO** 环境变量的值将在下一个版本中默认设置为 " +"True。它将在解码节点默认启用。请注意,此功能会消耗更多内存。如果您对内存敏感,请将其设置为 False。" #: ../../source/user_guide/release_notes.md:328 msgid "" @@ -1972,7 +2201,8 @@ msgid "" " it. If you hit OOM problem again, please submit an issue. " "[#5136](https://github.com/vllm-project/vllm-ascend/pull/5136)" msgstr "" -"**OOM 修复**:VL 模型上的 OOM 错误现已修复。我们将持续观察。如果您再次遇到 OOM 问题,请提交 issue。[#5136](https://github.com/vllm-project/vllm-ascend/pull/5136)" +"**OOM 修复**:VL 模型上的 OOM 错误现已修复。我们将持续观察。如果您再次遇到 OOM 问题,请提交 " +"issue。[#5136](https://github.com/vllm-project/vllm-ascend/pull/5136)" #: ../../source/user_guide/release_notes.md:335 msgid "" @@ -1980,42 +2210,49 @@ msgid "" " batched inferring. [#4932](https://github.com/vllm-project/vllm-" "ascend/pull/4932)" msgstr "" -"**Qwen3-Next-MTP 准确性**:修复了 Qwen3-Next-MTP 在批量推理时的准确性错误。[#4932](https://github.com/vllm-project/vllm-ascend/pull/4932)" +"**Qwen3-Next-MTP 准确性**:修复了 Qwen3-Next-MTP " +"在批量推理时的准确性错误。[#4932](https://github.com/vllm-project/vllm-" +"ascend/pull/4932)" #: ../../source/user_guide/release_notes.md:336 msgid "" "**ZMQ Bug Fix**: Fixed zmq send/receive failed bug. " "[#5503](https://github.com/vllm-project/vllm-ascend/pull/5503)" msgstr "" -"**ZMQ 错误修复**:修复了 zmq 发送/接收失败的 bug。[#5503](https://github.com/vllm-project/vllm-ascend/pull/5503)" +"**ZMQ 错误修复**:修复了 zmq 发送/接收失败的 bug。[#5503](https://github.com/vllm-project" +"/vllm-ascend/pull/5503)" #: ../../source/user_guide/release_notes.md:337 msgid "" "**Weight Transpose**: Fixed weight transpose in RL scenarios. " "[#5567](https://github.com/vllm-project/vllm-ascend/pull/5567)" msgstr "" -"**权重转置**:修复了 RL 场景中的权重转置问题。[#5567](https://github.com/vllm-project/vllm-ascend/pull/5567)" +"**权重转置**:修复了 RL 场景中的权重转置问题。[#5567](https://github.com/vllm-project/vllm-" +"ascend/pull/5567)" #: ../../source/user_guide/release_notes.md:338 msgid "" "**Eagle3 SP**: Adapted SP to eagle3. [#5562](https://github.com/vllm-" "project/vllm-ascend/pull/5562)" msgstr "" -"**Eagle3 SP**:使 SP 适配 eagle3。[#5562](https://github.com/vllm-project/vllm-ascend/pull/5562)" +"**Eagle3 SP**:使 SP 适配 eagle3。[#5562](https://github.com/vllm-project" +"/vllm-ascend/pull/5562)" #: ../../source/user_guide/release_notes.md:339 msgid "" -"**GLM4.6 MTP**: GLM4.6 now supports MTP with fullgraph. " +"**GLM4.6 MTP**: GLM4.6 now supports MTP with full graph. " "[#5460](https://github.com/vllm-project/vllm-ascend/pull/5460)" msgstr "" -"**GLM4.6 MTP**:GLM4.6 现在支持使用全图的 MTP。[#5460](https://github.com/vllm-project/vllm-ascend/pull/5460)" +"**GLM4.6 MTP**:GLM4.6 现在支持使用全图的 MTP。[#5460](https://github.com/vllm-" +"project/vllm-ascend/pull/5460)" #: ../../source/user_guide/release_notes.md:340 msgid "" "**Flashcomm2 Oshard**: Flashcomm2 now works with oshard generalized " "feature. [#4723](https://github.com/vllm-project/vllm-ascend/pull/4723)" msgstr "" -"**Flashcomm2 Oshard**:Flashcomm2 现在可与 oshard 通用化功能协同工作。[#4723](https://github.com/vllm-project/vllm-ascend/pull/4723)" +"**Flashcomm2 Oshard**:Flashcomm2 现在可与 oshard " +"通用化功能协同工作。[#4723](https://github.com/vllm-project/vllm-ascend/pull/4723)" #: ../../source/user_guide/release_notes.md:341 msgid "" @@ -2023,7 +2260,8 @@ msgid "" "expert overlap. [#5962](https://github.com/vllm-project/vllm-" "ascend/pull/5962)" msgstr "" -"**细粒度共享专家重叠**:支持细粒度的共享专家重叠。[#5962](https://github.com/vllm-project/vllm-ascend/pull/5962)" +"**细粒度共享专家重叠**:支持细粒度的共享专家重叠。[#5962](https://github.com/vllm-project/vllm-" +"ascend/pull/5962)" #: ../../source/user_guide/release_notes.md:345 msgid "" @@ -2032,23 +2270,23 @@ msgid "" " it in the next post release. [#6302](https://github.com/vllm-project" "/vllm-ascend/pull/6302)" msgstr "" -"由于 `transformers` 包的升级,某些模型的量化权重(如 `qwen2.5vl`、`gemma3`、`minimax`)可能无法工作。我们将在下一个后续版本中修复此问题。[#6302](https://github.com/vllm-project/vllm-ascend/pull/6302)" +"由于 `transformers` 包的升级,某些模型的量化权重(如 " +"`qwen2.5vl`、`gemma3`、`minimax`)可能无法工作。我们将在下一个后续版本中修复此问题。[#6302](https://github.com" +"/vllm-project/vllm-ascend/pull/6302)" #: ../../source/user_guide/release_notes.md:346 msgid "" "The performance of `Qwen3-32B` will not be good with 128K input case, " "it's suggested to enable pcp&dcp feature for this case. This will be " "improved in the next CANN release." -msgstr "" -"`Qwen3-32B` 在 128K 输入长度场景下的性能可能不佳,建议为此场景启用 pcp&dcp 功能。这将在下一个 CANN 版本中得到改进。" +msgstr "`Qwen3-32B` 在 128K 输入长度场景下的性能可能不佳,建议为此场景启用 pcp&dcp 功能。这将在下一个 CANN 版本中得到改进。" #: ../../source/user_guide/release_notes.md:347 msgid "" "The performance of `Qwen3-235B`, `Qwen3-480B` under prefill-decode " "scenario and EP=32 scenario is not good as expect. We'll improve it in " "the next post release." -msgstr "" -"`Qwen3-235B`、`Qwen3-480B` 在预填充-解码场景和 EP=32 场景下的性能未达预期。我们将在下一个后续版本中改进。" +msgstr "`Qwen3-235B`、`Qwen3-480B` 在预填充-解码场景和 EP=32 场景下的性能未达预期。我们将在下一个后续版本中改进。" #: ../../source/user_guide/release_notes.md:348 msgid "" @@ -2056,7 +2294,8 @@ msgid "" "the tp size for decode node is great than 1. `TP=1` doesn't work. This " "will be fixed in the next CANN release." msgstr "" -"在预填充-解码场景下部署 deepseek3.1 时,请确保解码节点的 tp 大小大于 1。`TP=1` 无法工作。这将在下一个 CANN 版本中修复。" +"在预填充-解码场景下部署 deepseek3.1 时,请确保解码节点的 tp 大小大于 1。`TP=1` 无法工作。这将在下一个 CANN " +"版本中修复。" #: ../../source/user_guide/release_notes.md:350 msgid "v0.14.0rc1 - 2026.01.26" @@ -2070,7 +2309,10 @@ msgid "" "We just list the differences from v0.13.0rc2. If you are upgrading from " "v0.13.0rc1, please read both v0.14.0rc1 and v0.13.0rc2 release notes." msgstr "" -"这是 vLLM Ascend v0.14.0 的第一个候选版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest)开始使用。此版本包含了 v0.13.0rc2 中的所有更改。因此我们仅列出与 v0.13.0rc2 的差异。如果您是从 v0.13.0rc1 升级,请同时阅读 v0.14.0rc1 和 v0.13.0rc2 的发布说明。" +"这是 vLLM Ascend v0.14.0 " +"的第一个候选版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest)开始使用。此版本包含了" +" v0.13.0rc2 中的所有更改。因此我们仅列出与 v0.13.0rc2 的差异。如果您是从 v0.13.0rc1 升级,请同时阅读 " +"v0.14.0rc1 和 v0.13.0rc2 的发布说明。" #: ../../source/user_guide/release_notes.md:356 msgid "" @@ -2079,15 +2321,16 @@ msgid "" "support for 310P. [#5776](https://github.com/vllm-project/vllm-" "ascend/pull/5776)" msgstr "" -"现已恢复对 310P 的支持。在此版本中,仅支持基础密集模型和 VL 模型,且使用 Eager 模式。我们将持续改进和维护对 310P 的支持。[#5776](https://github.com/vllm-project/vllm-ascend/pull/5776)" +"现已恢复对 310P 的支持。在此版本中,仅支持基础密集模型和 VL 模型,且使用 Eager 模式。我们将持续改进和维护对 310P " +"的支持。[#5776](https://github.com/vllm-project/vllm-ascend/pull/5776)" #: ../../source/user_guide/release_notes.md:357 msgid "" "Support compressed tensors moe w8a8-int8 quantization. " "[#5718](https://github.com/vllm-project/vllm-ascend/pull/5718)" msgstr "" -"支持压缩张量的 MoE w8a8-int8 量化。 " -"[#5718](https://github.com/vllm-project/vllm-ascend/pull/5718)" +"支持压缩张量的 MoE w8a8-int8 量化。 [#5718](https://github.com/vllm-project/vllm-" +"ascend/pull/5718)" #: ../../source/user_guide/release_notes.md:358 msgid "" @@ -2102,8 +2345,8 @@ msgid "" "Support Eagle3 speculative decoding for Qwen3vl. " "[#4848](https://github.com/vllm-project/vllm-ascend/pull/4848)" msgstr "" -"支持 Qwen3vl 的 Eagle3 推测式解码。 " -"[#4848](https://github.com/vllm-project/vllm-ascend/pull/4848)" +"支持 Qwen3vl 的 Eagle3 推测式解码。 [#4848](https://github.com/vllm-project/vllm-" +"ascend/pull/4848)" #: ../../source/user_guide/release_notes.md:363 msgid "" @@ -2126,8 +2369,8 @@ msgid "" "Add support of new W4A4_LAOS_DYNAMIC quantization method. " "[#5143](https://github.com/vllm-project/vllm-ascend/pull/5143)" msgstr "" -"新增对 W4A4_LAOS_DYNAMIC 量化方法的支持。 " -"[#5143](https://github.com/vllm-project/vllm-ascend/pull/5143)" +"新增对 W4A4_LAOS_DYNAMIC 量化方法的支持。 [#5143](https://github.com/vllm-project" +"/vllm-ascend/pull/5143)" #: ../../source/user_guide/release_notes.md:369 msgid "" @@ -2136,10 +2379,10 @@ msgid "" "[#5984](https://github.com/vllm-project/vllm-ascend/pull/5984) " "[#5765](https://github.com/vllm-project/vllm-ascend/pull/5765)" msgstr "" -"Qwen3-next 的性能已得到提升。 " -"[#5664](https://github.com/vllm-project/vllm-ascend/pull/5664) " -"[#5984](https://github.com/vllm-project/vllm-ascend/pull/5984) " -"[#5765](https://github.com/vllm-project/vllm-ascend/pull/5765)" +"Qwen3-next 的性能已得到提升。 [#5664](https://github.com/vllm-project/vllm-" +"ascend/pull/5664) [#5984](https://github.com/vllm-project/vllm-" +"ascend/pull/5984) [#5765](https://github.com/vllm-project/vllm-" +"ascend/pull/5765)" #: ../../source/user_guide/release_notes.md:370 msgid "" @@ -2154,8 +2397,8 @@ msgid "" "Merge Q/K split to simplify AscendApplyRotaryEmb for better performance. " "[#5799](https://github.com/vllm-project/vllm-ascend/pull/5799)" msgstr "" -"合并 Q/K 拆分以简化 AscendApplyRotaryEmb,从而提升性能。 " -"[#5799](https://github.com/vllm-project/vllm-ascend/pull/5799)" +"合并 Q/K 拆分以简化 AscendApplyRotaryEmb,从而提升性能。 [#5799](https://github.com" +"/vllm-project/vllm-ascend/pull/5799)" #: ../../source/user_guide/release_notes.md:372 msgid "" @@ -2163,25 +2406,25 @@ msgid "" "`fuse_allreduce_rms=True` in `--additional_config` to enable it. " "[#5034](https://github.com/vllm-project/vllm-ascend/pull/5034)" msgstr "" -"新增 Matmul Allreduce Rmsnorm 融合 Pass。默认禁用。在 `--additional_config` " -"中设置 `fuse_allreduce_rms=True` 以启用它。 " -"[#5034](https://github.com/vllm-project/vllm-ascend/pull/5034)" +"新增 Matmul Allreduce Rmsnorm 融合 Pass。默认禁用。在 `--additional_config` 中设置 " +"`fuse_allreduce_rms=True` 以启用它。 [#5034](https://github.com/vllm-project" +"/vllm-ascend/pull/5034)" #: ../../source/user_guide/release_notes.md:373 msgid "" "Optimize rope embedding with triton kernel for huge performance gain. " "[#5918](https://github.com/vllm-project/vllm-ascend/pull/5918)" msgstr "" -"使用 triton kernel 优化 rope embedding,以获得巨大的性能提升。 " -"[#5918](https://github.com/vllm-project/vllm-ascend/pull/5918)" +"使用 triton kernel 优化 rope embedding,以获得巨大的性能提升。 [#5918](https://github.com" +"/vllm-project/vllm-ascend/pull/5918)" #: ../../source/user_guide/release_notes.md:374 msgid "" "support advanced apply_top_k_top_p without top_k constraint. " "[#6098](https://github.com/vllm-project/vllm-ascend/pull/6098)" msgstr "" -"支持无 top_k 约束的高级 apply_top_k_top_p。 " -"[#6098](https://github.com/vllm-project/vllm-ascend/pull/6098)" +"支持无 top_k 约束的高级 apply_top_k_top_p。 [#6098](https://github.com/vllm-" +"project/vllm-ascend/pull/6098)" #: ../../source/user_guide/release_notes.md:375 msgid "" @@ -2197,16 +2440,16 @@ msgid "" "model runner v2 support triton of penalty. [#5854](https://github.com" "/vllm-project/vllm-ascend/pull/5854)" msgstr "" -"model runner v2 支持 triton 惩罚。 [#5854](https://github.com/vllm-project/vllm-" -"ascend/pull/5854)" +"model runner v2 支持 triton 惩罚。 [#5854](https://github.com/vllm-project" +"/vllm-ascend/pull/5854)" #: ../../source/user_guide/release_notes.md:380 msgid "" "model runner v2 support eagle spec decoding. [#5840](https://github.com" "/vllm-project/vllm-ascend/pull/5840)" msgstr "" -"model runner v2 支持 eagle 推测式解码。 [#5840](https://github.com/vllm-" -"project/vllm-ascend/pull/5840)" +"model runner v2 支持 eagle 推测式解码。 [#5840](https://github.com/vllm-project" +"/vllm-ascend/pull/5840)" #: ../../source/user_guide/release_notes.md:381 msgid "" @@ -2225,7 +2468,8 @@ msgid "" "it to False. [#5952](https://github.com/vllm-project/vllm-" "ascend/pull/5952)" msgstr "" -"`VLLM_ASCEND_ENABLE_MLAPO` 默认设置为 `True`。在 PD 部署场景的解码节点上会自动启用。请注意,此功能会消耗更多内存。如果您对内存敏感,请将其设置为 False。 " +"`VLLM_ASCEND_ENABLE_MLAPO` 默认设置为 `True`。在 PD " +"部署场景的解码节点上会自动启用。请注意,此功能会消耗更多内存。如果您对内存敏感,请将其设置为 False。 " "[#5952](https://github.com/vllm-project/vllm-ascend/pull/5952)" #: ../../source/user_guide/release_notes.md:383 @@ -2260,7 +2504,8 @@ msgid "" " The old ones are removed in this release." msgstr "" "EPLB 配置选项已移至 [additional " -"config](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/configuration/additional_config.html) 中的 `eplb_config`。旧选项在此版本中已被移除。" +"config](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/configuration/additional_config.html)" +" 中的 `eplb_config`。旧选项在此版本中已被移除。" #: ../../source/user_guide/release_notes.md:393 msgid "" @@ -2270,10 +2515,25 @@ msgid "" "[#5928](https://github.com/vllm-project/vllm-ascend/pull/5928)" msgstr "" "分析器环境变量,如 `VLLM_TORCH_PROFILER_DIR` 和 " -"`VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY`,现已不适用于 vLLM Ascend。请改用 vLLM 的 `--profiler-config` 参数。 " -"[#5928](https://github.com/vllm-project/vllm-ascend/pull/5928)" +"`VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY`,现已不适用于 vLLM Ascend。请改用 vLLM 的 " +"`--profiler-config` 参数。 [#5928](https://github.com/vllm-project/vllm-" +"ascend/pull/5928)" #: ../../source/user_guide/release_notes.md:395 +#: ../../source/user_guide/release_notes.md:505 +#: ../../source/user_guide/release_notes.md:542 +#: ../../source/user_guide/release_notes.md:591 +#: ../../source/user_guide/release_notes.md:646 +#: ../../source/user_guide/release_notes.md:748 +#: ../../source/user_guide/release_notes.md:802 +#: ../../source/user_guide/release_notes.md:847 +#: ../../source/user_guide/release_notes.md:881 +#: ../../source/user_guide/release_notes.md:936 +#: ../../source/user_guide/release_notes.md:1050 +#: ../../source/user_guide/release_notes.md:1086 +#: ../../source/user_guide/release_notes.md:1140 +#: ../../source/user_guide/release_notes.md:1384 +#: ../../source/user_guide/release_notes.md:1416 msgid "Known Issues" msgstr "已知问题" @@ -2284,7 +2544,9 @@ msgid "" "into your local vLLM code. This known issue will be fixed in vLLM in the " "next release." msgstr "" -"如果您有时遇到来自 `EngineCore` 进程的 pickle 错误,请将 [PR](https://github.com/vllm-project/vllm/pull/32022) cherry-pick 到您的本地 vLLM 代码中。此已知问题将在 vLLM 的下一个版本中修复。" +"如果您有时遇到来自 `EngineCore` 进程的 pickle 错误,请将 [PR](https://github.com/vllm-" +"project/vllm/pull/32022) cherry-pick 到您的本地 vLLM 代码中。此已知问题将在 vLLM " +"的下一个版本中修复。" #: ../../source/user_guide/release_notes.md:399 msgid "v0.13.0rc2 - 2026.01.24" @@ -2299,7 +2561,9 @@ msgid "" "Any feedback is welcome to help us to improve the final version of " "v0.13.0." msgstr "" -"这是 vLLM Ascend v0.13.0 的第二个候选版本。在此 rc 版本中,我们修复了大量错误并提升了多个模型的性能。请按照 [官方文档](https://docs.vllm.ai/projects/ascend/en/v0.13.0/) 开始使用。欢迎任何反馈以帮助我们改进 v0.13.0 的最终版本。" +"这是 vLLM Ascend v0.13.0 的第二个候选版本。在此 rc 版本中,我们修复了大量错误并提升了多个模型的性能。请按照 " +"[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.13.0/) " +"开始使用。欢迎任何反馈以帮助我们改进 v0.13.0 的最终版本。" #: ../../source/user_guide/release_notes.md:405 msgid "" @@ -2308,7 +2572,8 @@ msgid "" " significantly. A lot of bugs have been fixed and the performance has " "been improved for DeepSeek3.1/3.2, Qwen3 Dense/MOE models." msgstr "" -"在此版本中,我们主要关注质量和性能的改进。推测式解码、图模式、上下文并行和 EPLB 都得到了显著提升。修复了大量错误,并提升了 DeepSeek3.1/3.2、Qwen3 Dense/MOE 模型的性能。" +"在此版本中,我们主要关注质量和性能的改进。推测式解码、图模式、上下文并行和 EPLB 都得到了显著提升。修复了大量错误,并提升了 " +"DeepSeek3.1/3.2、Qwen3 Dense/MOE 模型的性能。" #: ../../source/user_guide/release_notes.md:409 msgid "" @@ -2323,36 +2588,36 @@ msgid "" "Eagle spec decode feature now works with full graph mode. " "[#5118](https://github.com/vllm-project/vllm-ascend/pull/5118)" msgstr "" -"Eagle 推测式解码功能现在可与全图模式协同工作。 " -"[#5118](https://github.com/vllm-project/vllm-ascend/pull/5118)" +"Eagle 推测式解码功能现在可与全图模式协同工作。 [#5118](https://github.com/vllm-project/vllm-" +"ascend/pull/5118)" #: ../../source/user_guide/release_notes.md:411 msgid "" "Context Parallel(PCP&DCP) feature is more stable now. And it works for " "most case. Please try it out." -msgstr "" -"上下文并行(PCP&DCP)功能现在更加稳定,适用于大多数情况。请尝试使用。" +msgstr "上下文并行(PCP&DCP)功能现在更加稳定,适用于大多数情况。请尝试使用。" #: ../../source/user_guide/release_notes.md:412 msgid "" "MTP and eagle spec decode feature now works in most cases. And it's " "suggested to use them in most cases." -msgstr "" -"MTP 和 eagle 推测式解码功能现在在大多数情况下都能工作。建议在大多数情况下使用它们。" +msgstr "MTP 和 eagle 推测式解码功能现在在大多数情况下都能工作。建议在大多数情况下使用它们。" #: ../../source/user_guide/release_notes.md:413 msgid "" "EPLB feature more stable now. Many bugs have been fixed. Mix placement " "works now [#6086](https://github.com/vllm-project/vllm-ascend/pull/6086)" msgstr "" -"EPLB 功能现在更加稳定。修复了许多错误。混合放置现已可用 [#6086](https://github.com/vllm-project/vllm-ascend/pull/6086)" +"EPLB 功能现在更加稳定。修复了许多错误。混合放置现已可用 [#6086](https://github.com/vllm-project" +"/vllm-ascend/pull/6086)" #: ../../source/user_guide/release_notes.md:414 msgid "" "Support kv nz feature for DeepSeek decode node in disagg-prefill scenario" " [#3072](https://github.com/vllm-project/vllm-ascend/pull/3072)" msgstr "" -"支持解耦-预填充场景下 DeepSeek 解码节点的 kv nz 功能 [#3072](https://github.com/vllm-project/vllm-ascend/pull/3072)" +"支持解耦-预填充场景下 DeepSeek 解码节点的 kv nz 功能 [#3072](https://github.com/vllm-" +"project/vllm-ascend/pull/3072)" #: ../../source/user_guide/release_notes.md:416 msgid "Model Support" @@ -2379,7 +2644,7 @@ msgid "" "Support for cross-attention and whisper models [#5592](https://github.com" "/vllm-project/vllm-ascend/pull/5592)" msgstr "" -"支持交叉注意力和 whisper 模型 [#5592](https://github.com/vllm-project/vllm-" +"支持交叉注意力和 Whisper 模型 [#5592](https://github.com/vllm-project/vllm-" "ascend/pull/5592)" #: ../../source/user_guide/release_notes.md:424 @@ -2388,14 +2653,15 @@ msgid "" "the performance of models. Such as `RejectSampler`, " "`MoeInitRoutingCustom`, `DispatchFFNCombine` and so on." msgstr "" -"此版本中添加了许多自定义算子和 triton kernel 以加速模型性能。例如 `RejectSampler`、`MoeInitRoutingCustom`、`DispatchFFNCombine` 等。" +"此版本中添加了许多自定义算子和 Triton 内核以加速模型性能,例如 `RejectSampler`、`MoeInitRoutingCustom`、`DispatchFFNCombine` 等。" #: ../../source/user_guide/release_notes.md:425 msgid "" "Improved the performance of Layerwise Connector " "[#5303](https://github.com/vllm-project/vllm-ascend/pull/5303)" msgstr "" -"提升了 Layerwise Connector 的性能 [#5303](https://github.com/vllm-project/vllm-ascend/pull/5303)" +"提升了 Layerwise Connector 的性能 [#5303](https://github.com/vllm-project/vllm-" +"ascend/pull/5303)" #: ../../source/user_guide/release_notes.md:429 msgid "" @@ -2403,71 +2669,79 @@ msgid "" "vLLM. It will be used by default in the future release. " "[#5210](https://github.com/vllm-project/vllm-ascend/pull/5210)" msgstr "" -"基础支持 Model Runner v2。Model Runner V2 是 vLLM 的下一代版本,将在未来的版本中默认使用。" -"[#5210](https://github.com/vllm-project/vllm-ascend/pull/5210)" +"基础支持 Model Runner v2。Model Runner V2 是 vLLM 的下一代版本,将在未来的版本中默认使用。[#5210](https://github.com/vllm-project/vllm-ascend/pull/5210)" #: ../../source/user_guide/release_notes.md:430 msgid "" "Fixed a bug that the zmq send/receive may failed " "[#5503](https://github.com/vllm-project/vllm-ascend/pull/5503)" msgstr "" -"修复了 zmq 发送/接收可能失败的 bug [#5503](https://github.com/vllm-project/vllm-ascend/pull/5503)" +"修复了 ZMQ 发送/接收可能失败的 bug [#5503](https://github.com/vllm-project/vllm-" +"ascend/pull/5503)" #: ../../source/user_guide/release_notes.md:431 msgid "" "Supported to use full-graph with Qwen3-Next-MTP " "[#5477](https://github.com/vllm-project/vllm-ascend/pull/5477)" msgstr "" -"支持 Qwen3-Next-MTP 使用全图模式 [#5477](https://github.com/vllm-project/vllm-ascend/pull/5477)" +"支持 Qwen3-Next-MTP 使用全图模式 [#5477](https://github.com/vllm-project/vllm-" +"ascend/pull/5477)" #: ../../source/user_guide/release_notes.md:432 msgid "" "Fix weight transpose in RL scenarios [#5567](https://github.com/vllm-" "project/vllm-ascend/pull/5567)" msgstr "" -"修复强化学习场景中的权重转置问题 [#5567](https://github.com/vllm-project/vllm-ascend/pull/5567)" +"修复强化学习场景中的权重转置问题 [#5567](https://github.com/vllm-project/vllm-" +"ascend/pull/5567)" #: ../../source/user_guide/release_notes.md:433 msgid "" "Adapted SP to eagle3 [#5562](https://github.com/vllm-project/vllm-" "ascend/pull/5562)" msgstr "" -"适配 SP 以支持 eagle3 [#5562](https://github.com/vllm-project/vllm-ascend/pull/5562)" +"适配 SP 以支持 Eagle3 [#5562](https://github.com/vllm-project/vllm-" +"ascend/pull/5562)" #: ../../source/user_guide/release_notes.md:434 msgid "" "Context Parallel(PCP&DCP) support mlapo [#5672](https://github.com/vllm-" "project/vllm-ascend/pull/5672)" msgstr "" -"上下文并行(PCP&DCP)支持 mlapo [#5672](https://github.com/vllm-project/vllm-ascend/pull/5672)" +"上下文并行(PCP&DCP)支持 MLAPO [#5672](https://github.com/vllm-project/vllm-" +"ascend/pull/5672)" #: ../../source/user_guide/release_notes.md:435 msgid "" -"GLM4.6 support mtp with fullgraph [#5460](https://github.com/vllm-project" -"/vllm-ascend/pull/5460)" +"GLM4.6 support mtp with full graph [#5460](https://github.com/vllm-" +"project/vllm-ascend/pull/5460)" msgstr "" -"GLM4.6 支持使用全图模式的 mtp [#5460](https://github.com/vllm-project/vllm-ascend/pull/5460)" +"GLM4.6 支持使用全图模式的 MTP [#5460](https://github.com/vllm-project/vllm-" +"ascend/pull/5460)" #: ../../source/user_guide/release_notes.md:436 msgid "" "Flashcomm2 now works with oshard generalized feature " "[#4723](https://github.com/vllm-project/vllm-ascend/pull/4723)" msgstr "" -"Flashcomm2 现在可与 oshard 通用化功能协同工作 [#4723](https://github.com/vllm-project/vllm-ascend/pull/4723)" +"Flashcomm2 现在可与 OShard 通用化功能协同工作 [#4723](https://github.com/vllm-project" +"/vllm-ascend/pull/4723)" #: ../../source/user_guide/release_notes.md:437 msgid "" "Support setting tp=1 for the Eagle draft model [#5804](https://github.com" "/vllm-project/vllm-ascend/pull/5804)" msgstr "" -"支持为 Eagle 草稿模型设置 tp=1 [#5804](https://github.com/vllm-project/vllm-ascend/pull/5804)" +"支持为 Eagle 草稿模型设置 tp=1 [#5804](https://github.com/vllm-project/vllm-" +"ascend/pull/5804)" #: ../../source/user_guide/release_notes.md:438 msgid "" "Flashcomm1 feature now works with qwen3-vl [#5848](https://github.com" "/vllm-project/vllm-ascend/pull/5848)" msgstr "" -"Flashcomm1 功能现在可与 qwen3-vl 协同工作 [#5848](https://github.com/vllm-project/vllm-ascend/pull/5848)" +"Flashcomm1 功能现在可与 Qwen3-VL 协同工作 [#5848](https://github.com/vllm-project" +"/vllm-ascend/pull/5848)" #: ../../source/user_guide/release_notes.md:439 msgid "" @@ -2486,7 +2760,8 @@ msgid "" "will not be installed by default. Please install it by hand from [pypi " "mirror](https://mirrors.huaweicloud.com/ascend/repos/pypi/torch-npu/)." msgstr "" -"torch-npu 已升级至 2.8.0.post1。请注意,此 post 版本默认不会安装,请从 [pypi 镜像](https://mirrors.huaweicloud.com/ascend/repos/pypi/torch-npu/)手动安装。" +"torch-npu 已升级至 2.8.0.post1。请注意,此 post 版本默认不会安装,请从 [PyPI " +"镜像](https://mirrors.huaweicloud.com/ascend/repos/pypi/torch-npu/)手动安装。" #: ../../source/user_guide/release_notes.md:445 msgid "triton-ascend is upgraded to 3.2.0" @@ -2505,7 +2780,9 @@ msgid "" "config](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/configuration/additional_config.html)." " The old ones will be removed in the next release." msgstr "" -"eplb 配置选项已移至 [附加配置](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/configuration/additional_config.html) 中的 `eplb_config`。旧选项将在下一个版本中移除。" +"EPLB 配置选项已移至 " +"[附加配置](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/configuration/additional_config.html)" +" 中的 `eplb_config`。旧选项将在下一个版本中移除。" #: ../../source/user_guide/release_notes.md:451 msgid "" @@ -2513,7 +2790,9 @@ msgid "" "ascend/blob/v0.13.0rc2/docs/source/developer_guide/performance_and_debug/profile_execute_duration.md)" " is deprecated. It's replaced by `ObservabilityConfig` from vLLM." msgstr "" -"`ProfileExecuteDuration` [功能](https://github.com/vllm-project/vllm-ascend/blob/v0.13.0rc2/docs/source/developer_guide/performance_and_debug/profile_execute_duration.md) 已弃用。它已被 vLLM 的 `ObservabilityConfig` 取代。" +"`ProfileExecuteDuration` [功能](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.13.0rc2/docs/source/developer_guide/performance_and_debug/profile_execute_duration.md)" +" 已弃用。它已被 vLLM 的 `ObservabilityConfig` 取代。" #: ../../source/user_guide/release_notes.md:452 msgid "" @@ -2522,7 +2801,8 @@ msgid "" "Please note that this feature will cost more memory. If you are memory " "sensitive, please set it to False." msgstr "" -"`VLLM_ASCEND_ENABLE_MLAPO` 环境变量的值将在下一个版本中默认设置为 True。它将在解码节点默认启用。请注意,此功能会消耗更多内存。如果您对内存敏感,请将其设置为 False。" +"`VLLM_ASCEND_ENABLE_MLAPO` 环境变量的值将在下一个版本中默认设置为 " +"True。它将在解码节点默认启用。请注意,此功能会消耗更多内存。如果您对内存敏感,请将其设置为 False。" #: ../../source/user_guide/release_notes.md:454 msgid "v0.13.0rc1 - 2025.12.27" @@ -2536,7 +2816,9 @@ msgid "" "Please follow the [official " "doc](https://docs.vllm.ai/projects/ascend/en/latest) to get started." msgstr "" -"这是 vLLM Ascend v0.13.0 的第一个候选版本。在此版本中,我们修复了大量错误,提升了性能并增加了功能支持。欢迎任何反馈以帮助我们改进 vLLM Ascend。请遵循[官方文档](https://docs.vllm.ai/projects/ascend/en/latest)开始使用。" +"这是 vLLM Ascend v0.13.0 " +"的第一个候选版本。在此版本中,我们修复了大量错误,提升了性能并增加了功能支持。欢迎任何反馈以帮助我们改进 vLLM " +"Ascend。请遵循[官方文档](https://docs.vllm.ai/projects/ascend/en/latest)开始使用。" #: ../../source/user_guide/release_notes.md:460 msgid "" @@ -2544,7 +2826,8 @@ msgid "" "[tutorials](https://github.com/vllm-project/vllm-" "ascend/blob/v0.13.0rc1/docs/source/tutorials/DeepSeek-V3.2.md)" msgstr "" -"提升了 DeepSeek V3.2 的性能,请参考[教程](https://github.com/vllm-project/vllm-ascend/blob/v0.13.0rc1/docs/source/tutorials/DeepSeek-V3.2.md)" +"提升了 DeepSeek V3.2 的性能,请参考[教程](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.13.0rc1/docs/source/tutorials/DeepSeek-V3.2.md)" #: ../../source/user_guide/release_notes.md:461 msgid "" @@ -2553,7 +2836,9 @@ msgid "" "refer to [tutorials](https://github.com/vllm-project/vllm-" "ascend/blob/v0.13.0rc1/docs/source/tutorials/Qwen3-Next.md)" msgstr "" -"现已支持带分块预填充的 Qwen3-Next MTP [#4770](https://github.com/vllm-project/vllm-ascend/pull/4770),请参考[教程](https://github.com/vllm-project/vllm-ascend/blob/v0.13.0rc1/docs/source/tutorials/Qwen3-Next.md)" +"现已支持带分块预填充的 Qwen3-Next MTP [#4770](https://github.com/vllm-project/vllm-" +"ascend/pull/4770),请参考[教程](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.13.0rc1/docs/source/tutorials/Qwen3-Next.md)" #: ../../source/user_guide/release_notes.md:462 msgid "" @@ -2562,56 +2847,62 @@ msgid "" " feedback. please refer to [context parallel feature " "guide](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/context_parallel.html)" msgstr "" -"[实验性] 支持预填充上下文并行和解码上下文并行,但请注意,目前这是一个实验性功能,欢迎任何反馈。请参考[上下文并行功能指南](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/context_parallel.html)" +"[实验性] " +"支持预填充上下文并行和解码上下文并行,但请注意,目前这是一个实验性功能,欢迎任何反馈。请参考[上下文并行功能指南](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/context_parallel.html)" #: ../../source/user_guide/release_notes.md:466 msgid "" "Support openPangu Ultra MoE [4615](https://github.com/vllm-project/vllm-" "ascend/pull/4615)" msgstr "" -"支持 openPangu Ultra MoE [4615](https://github.com/vllm-project/vllm-ascend/pull/4615)" +"支持 OpenPangu Ultra MoE [4615](https://github.com/vllm-project/vllm-" +"ascend/pull/4615)" #: ../../source/user_guide/release_notes.md:467 msgid "" "A new quantization method W8A16 is supported now. " "[#4541](https://github.com/vllm-project/vllm-ascend/pull/4541)" msgstr "" -"现已支持新的量化方法 W8A16 [#4541](https://github.com/vllm-project/vllm-ascend/pull/4541)" +"现已支持新的量化方法 W8A16 [#4541](https://github.com/vllm-project/vllm-" +"ascend/pull/4541)" #: ../../source/user_guide/release_notes.md:468 msgid "" "Cross-machine Disaggregated Prefill is supported now. " "[#5008](https://github.com/vllm-project/vllm-ascend/pull/5008)" -msgstr "" -"现已支持跨机解耦预填充 [#5008](https://github.com/vllm-project/vllm-ascend/pull/5008)" +msgstr "现已支持跨机解耦预填充 [#5008](https://github.com/vllm-project/vllm-ascend/pull/5008)" #: ../../source/user_guide/release_notes.md:469 msgid "" "Add UCMConnector for KV Cache Offloading. [#4411](https://github.com" "/vllm-project/vllm-ascend/pull/4411)" msgstr "" -"为 KV Cache 卸载添加 UCMConnector [#4411](https://github.com/vllm-project/vllm-ascend/pull/4411)" +"为 KV Cache 卸载添加 UCMConnector [#4411](https://github.com/vllm-project" +"/vllm-ascend/pull/4411)" #: ../../source/user_guide/release_notes.md:470 msgid "" "Support async_scheduler and disable_padded_drafter_batch in eagle. " "[#4893](https://github.com/vllm-project/vllm-ascend/pull/4893)" msgstr "" -"在 eagle 中支持 async_scheduler 和 disable_padded_drafter_batch [#4893](https://github.com/vllm-project/vllm-ascend/pull/4893)" +"在 Eagle 中支持 async_scheduler 和 disable_padded_drafter_batch " +"[#4893](https://github.com/vllm-project/vllm-ascend/pull/4893)" #: ../../source/user_guide/release_notes.md:471 msgid "" "Support pcp + mtp in full graph mode. [#4572](https://github.com/vllm-" "project/vllm-ascend/pull/4572)" msgstr "" -"在全图模式下支持 pcp + mtp [#4572](https://github.com/vllm-project/vllm-ascend/pull/4572)" +"在全图模式下支持 PCP + MTP [#4572](https://github.com/vllm-project/vllm-" +"ascend/pull/4572)" #: ../../source/user_guide/release_notes.md:472 msgid "" "Enhance all-reduce skipping logic for MoE models in NPUModelRunner " "[#5329](https://github.com/vllm-project/vllm-ascend/pull/5329)" msgstr "" -"增强 NPUModelRunner 中 MoE 模型的 all-reduce 跳过逻辑 [#5329](https://github.com/vllm-project/vllm-ascend/pull/5329)" +"增强 NPUModelRunner 中 MoE 模型的 All-Reduce 跳过逻辑 [#5329](https://github.com" +"/vllm-project/vllm-ascend/pull/5329)" #: ../../source/user_guide/release_notes.md:476 msgid "Some general performance improvement:" @@ -2622,7 +2913,8 @@ msgid "" "Add l2norm triton kernel [#4595](https://github.com/vllm-project/vllm-" "ascend/pull/4595)" msgstr "" -"添加 l2norm triton 内核 [#4595](https://github.com/vllm-project/vllm-ascend/pull/4595)" +"添加 L2Norm Triton 内核 [#4595](https://github.com/vllm-project/vllm-" +"ascend/pull/4595)" #: ../../source/user_guide/release_notes.md:479 msgid "" @@ -2630,14 +2922,16 @@ msgid "" " in graph mode. [#5077](https://github.com/vllm-project/vllm-" "ascend/pull/5077)" msgstr "" -"为带 SP 的 AddRmsnormQuant 添加新模式,该模式仅在图模式下生效 [#5077](https://github.com/vllm-project/vllm-ascend/pull/5077)" +"为带 SP 的 AddRmsnormQuant 添加新模式,该模式仅在图模式下生效 [#5077](https://github.com" +"/vllm-project/vllm-ascend/pull/5077)" #: ../../source/user_guide/release_notes.md:480 msgid "" "Add async exponential while model executing. [#4501](https://github.com" "/vllm-project/vllm-ascend/pull/4501)" msgstr "" -"在模型执行时添加异步指数 [#4501](https://github.com/vllm-project/vllm-ascend/pull/4501)" +"在模型执行时添加异步指数 [#4501](https://github.com/vllm-project/vllm-" +"ascend/pull/4501)" #: ../../source/user_guide/release_notes.md:481 msgid "" @@ -2645,7 +2939,8 @@ msgid "" "transpose_batchmatmul [#5390](https://github.com/vllm-project/vllm-" "ascend/pull/5390)" msgstr "" -"移除注意力后的转置步骤,并切换到 transpose_batchmatmul [#5390](https://github.com/vllm-project/vllm-ascend/pull/5390)" +"移除注意力后的转置步骤,并切换到 transpose_batchmatmul [#5390](https://github.com/vllm-" +"project/vllm-ascend/pull/5390)" #: ../../source/user_guide/release_notes.md:482 msgid "" @@ -2654,7 +2949,8 @@ msgid "" " in [FAQs](https://docs.vllm.ai/projects/ascend/en/latest/faqs.html) to " "enable it." msgstr "" -"为优化小批量场景下的性能,提供了一个带有 flash decoding 功能的注意力算子,请参考[常见问题解答](https://docs.vllm.ai/projects/ascend/en/latest/faqs.html)中的第22项来启用它。" +"为优化小批量场景下的性能,提供了一个带有 Flash Decoding " +"功能的注意力算子,请参考[常见问题解答](https://docs.vllm.ai/projects/ascend/en/latest/faqs.html)中的第22项来启用它。" #: ../../source/user_guide/release_notes.md:484 #: ../../source/user_guide/release_notes.md:522 @@ -2671,35 +2967,40 @@ msgid "" "hit OOM problem again, please submit an issue. [#5136](https://github.com" "/vllm-project/vllm-ascend/pull/5136)" msgstr "" -"VL 模型上的 OOM 错误现已修复。我们将持续观察,如果您再次遇到 OOM 问题,请提交 issue [#5136](https://github.com/vllm-project/vllm-ascend/pull/5136)" +"VL 模型上的 OOM 错误现已修复。我们将持续观察,如果您再次遇到 OOM 问题,请提交 issue " +"[#5136](https://github.com/vllm-project/vllm-ascend/pull/5136)" #: ../../source/user_guide/release_notes.md:487 msgid "" "Fixed an accuracy bug of Qwen3-Next-MTP when batched inferring. " "[#4932](https://github.com/vllm-project/vllm-ascend/pull/4932)" msgstr "" -"修复了 Qwen3-Next-MTP 在批量推理时的精度错误 [#4932](https://github.com/vllm-project/vllm-ascend/pull/4932)" +"修复了 Qwen3-Next-MTP 在批量推理时的精度错误 [#4932](https://github.com/vllm-project" +"/vllm-ascend/pull/4932)" #: ../../source/user_guide/release_notes.md:488 msgid "" "Fix npu-cpu offloading interface change bug. [#5290](https://github.com" "/vllm-project/vllm-ascend/pull/5290)" msgstr "" -"修复 npu-cpu 卸载接口变更导致的错误 [#5290](https://github.com/vllm-project/vllm-ascend/pull/5290)" +"修复 npu-cpu 卸载接口变更导致的错误 [#5290](https://github.com/vllm-project/vllm-" +"ascend/pull/5290)" #: ../../source/user_guide/release_notes.md:489 msgid "" "Fix MHA model runtime error in aclgraph mode [#5397](https://github.com" "/vllm-project/vllm-ascend/pull/5397)" msgstr "" -"修复 MHA 模型在 aclgraph 模式下的运行时错误 [#5397](https://github.com/vllm-project/vllm-ascend/pull/5397)" +"修复 MHA 模型在 aclgraph 模式下的运行时错误 [#5397](https://github.com/vllm-project" +"/vllm-ascend/pull/5397)" #: ../../source/user_guide/release_notes.md:490 msgid "" "Fix unsuitable moe_comm_type under ep=1 scenario " "[#5388](https://github.com/vllm-project/vllm-ascend/pull/5388)" msgstr "" -"修复 ep=1 场景下不合适的 moe_comm_type 设置 [#5388](https://github.com/vllm-project/vllm-ascend/pull/5388)" +"修复 ep=1 场景下不合适的 moe_comm_type 设置 [#5388](https://github.com/vllm-project" +"/vllm-ascend/pull/5388)" #: ../../source/user_guide/release_notes.md:494 msgid "" @@ -2708,7 +3009,9 @@ msgid "" "be enabled together. [#5272](https://github.com/vllm-project/vllm-" "ascend/pull/5272)" msgstr "" -"`VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE` 已被移除,建议使用 `VLLM_ASCEND_ENABLE_PREFETCH_MLP` 替代,因为它们通常一起启用 [#5272](https://github.com/vllm-project/vllm-ascend/pull/5272)" +"`VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE` 已被移除,建议使用 " +"`VLLM_ASCEND_ENABLE_PREFETCH_MLP` 替代,因为它们通常一起启用 " +"[#5272](https://github.com/vllm-project/vllm-ascend/pull/5272)" #: ../../source/user_guide/release_notes.md:495 msgid "" @@ -2725,7 +3028,8 @@ msgid "" "set it to 2 if you make sure it works for your case. " "[#4878](https://github.com/vllm-project/vllm-ascend/pull/4878)" msgstr "" -"对于浮点权重的情况,`VLLM_ASCEND_ENABLE_NZ` 已被禁用,因为我们注意到在某些浮点场景下性能不佳。如果您确认它适用于您的场景,可以将其设置为2。 " +"对于浮点权重的情况,`VLLM_ASCEND_ENABLE_NZ` " +"已被禁用,因为我们注意到在某些浮点场景下性能不佳。如果您确认它适用于您的场景,可以将其设置为2。 " "[#4878](https://github.com/vllm-project/vllm-ascend/pull/4878)" #: ../../source/user_guide/release_notes.md:497 @@ -2742,24 +3046,25 @@ msgid "" " the type is change from `dict` to `string`. [#5296](https://github.com" "/vllm-project/vllm-ascend/pull/5296)" msgstr "" -"`additional_config` 中的 `dump_config` 已重命名为 `dump_config_path`,其类型也从 `dict` 更改为 `string`。 " -"[#5296](https://github.com/vllm-project/vllm-ascend/pull/5296)" +"`additional_config` 中的 `dump_config` 已重命名为 `dump_config_path`,其类型也从 " +"`dict` 更改为 `string`。 [#5296](https://github.com/vllm-project/vllm-" +"ascend/pull/5296)" #: ../../source/user_guide/release_notes.md:502 msgid "" "vLLM version has been upgraded to 0.13.0 and drop 0.12.0 support. " "[#5146](https://github.com/vllm-project/vllm-ascend/pull/5146)" msgstr "" -"vLLM 版本已升级至 0.13.0,并停止支持 0.12.0。 " -"[#5146](https://github.com/vllm-project/vllm-ascend/pull/5146)" +"vLLM 版本已升级至 0.13.0,并停止支持 0.12.0。 [#5146](https://github.com/vllm-project" +"/vllm-ascend/pull/5146)" #: ../../source/user_guide/release_notes.md:503 msgid "" "Transformer version has been upgraded >= 4.57.3 " "[#5250](https://github.com/vllm-project/vllm-ascend/pull/5250)" msgstr "" -"Transformer 版本已升级至 >= 4.57.3 " -"[#5250](https://github.com/vllm-project/vllm-ascend/pull/5250)" +"Transformer 版本已升级至 >= 4.57.3 [#5250](https://github.com/vllm-project" +"/vllm-ascend/pull/5250)" #: ../../source/user_guide/release_notes.md:507 msgid "" @@ -2767,7 +3072,8 @@ msgid "" "`gpu-memory-utilization` according to the doc to run Qwen3-Next. We'll " "improve it in the next release" msgstr "" -"Qwen3-Next 目前不支持长序列场景,运行 Qwen3-Next 时需根据文档限制 `gpu-memory-utilization`。我们将在下一个版本中改进此问题。" +"Qwen3-Next 目前不支持长序列场景,运行 Qwen3-Next 时需根据文档限制 `gpu-memory-" +"utilization`。我们将在下一个版本中改进此问题。" #: ../../source/user_guide/release_notes.md:508 msgid "" @@ -2801,64 +3107,71 @@ msgid "" "release note will only contain the important change and note from " "v0.11.0rc3." msgstr "" -"我们很高兴地宣布 vLLM Ascend v0.11.0 版本发布。这是 v0.11.0 的正式版本。请按照 [官方文档](https://docs.vllm.ai/projects/ascend/en/v0.11.0) 开始使用。如有需要,我们未来会考虑发布后续版本。本版本说明仅包含自 v0.11.0rc3 以来的重要变更和注意事项。" +"我们很高兴地宣布 vLLM Ascend v0.11.0 版本发布。这是 v0.11.0 的正式版本。请按照 " +"[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.11.0) " +"开始使用。如有需要,我们未来会考虑发布后续版本。本版本说明仅包含自 v0.11.0rc3 以来的重要变更和注意事项。" #: ../../source/user_guide/release_notes.md:517 msgid "" "Improved the performance for deepseek 3/3.1. [#3995](https://github.com" "/vllm-project/vllm-ascend/pull/3995)" msgstr "" -"提升了 deepseek 3/3.1 的性能。 [#3995](https://github.com/vllm-project/vllm-ascend/pull/3995)" +"提升了 deepseek 3/3.1 的性能。 [#3995](https://github.com/vllm-project/vllm-" +"ascend/pull/3995)" #: ../../source/user_guide/release_notes.md:518 msgid "" "Fixed the accuracy bug for qwen3-vl. [#4811](https://github.com/vllm-" "project/vllm-ascend/pull/4811)" msgstr "" -"修复了 qwen3-vl 的精度问题。 [#4811](https://github.com/vllm-project/vllm-ascend/pull/4811)" +"修复了 qwen3-vl 的精度问题。 [#4811](https://github.com/vllm-project/vllm-" +"ascend/pull/4811)" #: ../../source/user_guide/release_notes.md:519 msgid "" "Improved the performance of sample. [#4153](https://github.com/vllm-" "project/vllm-ascend/pull/4153)" -msgstr "" -"提升了采样性能。 [#4153](https://github.com/vllm-project/vllm-ascend/pull/4153)" +msgstr "提升了采样性能。 [#4153](https://github.com/vllm-project/vllm-ascend/pull/4153)" #: ../../source/user_guide/release_notes.md:520 msgid "" "Eagle3 is back now. [#4721](https://github.com/vllm-project/vllm-" "ascend/pull/4721)" msgstr "" -"Eagle3 现已恢复支持。 [#4721](https://github.com/vllm-project/vllm-ascend/pull/4721)" +"Eagle3 现已恢复支持。 [#4721](https://github.com/vllm-project/vllm-" +"ascend/pull/4721)" #: ../../source/user_guide/release_notes.md:524 msgid "" "Improved the performance for kimi-k2. [#4555](https://github.com/vllm-" "project/vllm-ascend/pull/4555)" msgstr "" -"提升了 kimi-k2 的性能。 [#4555](https://github.com/vllm-project/vllm-ascend/pull/4555)" +"提升了 kimi-k2 的性能。 [#4555](https://github.com/vllm-project/vllm-" +"ascend/pull/4555)" #: ../../source/user_guide/release_notes.md:525 msgid "" "Fixed a quantization bug for deepseek3.2-exp. [#4797](https://github.com" "/vllm-project/vllm-ascend/pull/4797)" msgstr "" -"修复了 deepseek3.2-exp 的一个量化问题。 [#4797](https://github.com/vllm-project/vllm-ascend/pull/4797)" +"修复了 deepseek3.2-exp 的一个量化问题。 [#4797](https://github.com/vllm-project" +"/vllm-ascend/pull/4797)" #: ../../source/user_guide/release_notes.md:526 msgid "" "Fixed qwen3-vl-moe bug under high concurrency. [#4658](https://github.com" "/vllm-project/vllm-ascend/pull/4658)" msgstr "" -"修复了 qwen3-vl-moe 在高并发下的问题。 [#4658](https://github.com/vllm-project/vllm-ascend/pull/4658)" +"修复了 qwen3-vl-moe 在高并发下的问题。 [#4658](https://github.com/vllm-project/vllm-" +"ascend/pull/4658)" #: ../../source/user_guide/release_notes.md:527 msgid "" "Fixed an accuracy bug for Prefill Decode disaggregation case. " "[#4437](https://github.com/vllm-project/vllm-ascend/pull/4437)" msgstr "" -"修复了 Prefill Decode 解耦场景下的一个精度问题。 " -"[#4437](https://github.com/vllm-project/vllm-ascend/pull/4437)" +"修复了 Prefill Decode 解耦场景下的一个精度问题。 [#4437](https://github.com/vllm-project" +"/vllm-ascend/pull/4437)" #: ../../source/user_guide/release_notes.md:528 msgid "" @@ -2866,15 +3179,17 @@ msgid "" "ascend/pull/4576) [#4777](https://github.com/vllm-project/vllm-" "ascend/pull/4777)" msgstr "" -"修复了 EPLB 的一些问题。 [#4576](https://github.com/vllm-project/vllm-ascend/pull/4576) [#4777](https://github.com/vllm-project/vllm-ascend/pull/4777)" +"修复了 EPLB 的一些问题。 [#4576](https://github.com/vllm-project/vllm-" +"ascend/pull/4576) [#4777](https://github.com/vllm-project/vllm-" +"ascend/pull/4777)" #: ../../source/user_guide/release_notes.md:529 msgid "" "Fixed the version incompatibility issue for openEuler docker image. " "[#4745](https://github.com/vllm-project/vllm-ascend/pull/4745)" msgstr "" -"修复了 openEuler docker 镜像的版本兼容性问题。 " -"[#4745](https://github.com/vllm-project/vllm-ascend/pull/4745)" +"修复了 openEuler docker 镜像的版本兼容性问题。 [#4745](https://github.com/vllm-project" +"/vllm-ascend/pull/4745)" #: ../../source/user_guide/release_notes.md:531 msgid "Deprecation announcement" @@ -2904,7 +3219,9 @@ msgid "" "/torch-npu/). So it's hard to add it to auto dependence. Please install " "it by yourself." msgstr "" -"torch-npu 已升级至 2.7.1.post1。请注意,该软件包已推送至 [pypi 镜像](https://mirrors.huaweicloud.com/ascend/repos/pypi/torch-npu/)。因此很难将其添加到自动依赖中。请自行安装。" +"torch-npu 已升级至 2.7.1.post1。请注意,该软件包已推送至 [pypi " +"镜像](https://mirrors.huaweicloud.com/ascend/repos/pypi/torch-" +"npu/)。因此很难将其添加到自动依赖中。请自行安装。" #: ../../source/user_guide/release_notes.md:540 msgid "CANN is upgraded to 8.3.rc2." @@ -2915,29 +3232,25 @@ msgid "" "Qwen3-Next doesn't support expert parallel and MTP features in this " "release. And it'll be oom if the input is too long. We'll improve it in " "the next release" -msgstr "" -"Qwen3-Next 在此版本中不支持专家并行和 MTP 功能。如果输入过长,将会出现内存不足(OOM)。我们将在下一个版本中改进此问题。" +msgstr "Qwen3-Next 在此版本中不支持专家并行和 MTP 功能。如果输入过长,将会出现内存不足(OOM)。我们将在下一个版本中改进此问题。" #: ../../source/user_guide/release_notes.md:545 msgid "" "Deepseek 3.2 only work with torchair graph mode in this release. We'll " "make it work with aclgraph mode in the next release." -msgstr "" -"Deepseek 3.2 在此版本中仅支持 torchair graph 模式。我们将在下一个版本中使其支持 aclgraph 模式。" +msgstr "Deepseek 3.2 在此版本中仅支持 torchair graph 模式。我们将在下一个版本中使其支持 aclgraph 模式。" #: ../../source/user_guide/release_notes.md:546 msgid "" "Qwen2-audio doesn't work by default. Temporary solution is to set `--gpu-" "memory-utilization` to a suitable value, such as 0.8." -msgstr "" -"Qwen2-audio 默认无法工作。临时解决方案是将 `--gpu-memory-utilization` 设置为合适的值,例如 0.8。" +msgstr "Qwen2-audio 默认无法工作。临时解决方案是将 `--gpu-memory-utilization` 设置为合适的值,例如 0.8。" #: ../../source/user_guide/release_notes.md:547 msgid "" "CPU bind feature doesn't work if more than one vLLM instance is running " "on the same node." -msgstr "" -"如果在同一节点上运行多个 vLLM 实例,CPU 绑定功能将无法工作。" +msgstr "如果在同一节点上运行多个 vLLM 实例,CPU 绑定功能将无法工作。" #: ../../source/user_guide/release_notes.md:549 msgid "v0.12.0rc1 - 2025.12.13" @@ -2951,7 +3264,9 @@ msgid "" "Please follow the [official " "doc](https://docs.vllm.ai/projects/ascend/en/latest) to get started." msgstr "" -"这是 vLLM Ascend v0.12.0 的第一个候选版本。我们在本次发布中修复了大量问题,提升了性能并增加了功能支持。欢迎任何反馈以帮助我们改进 vLLM Ascend。请按照 [官方文档](https://docs.vllm.ai/projects/ascend/en/latest) 开始使用。" +"这是 vLLM Ascend v0.12.0 " +"的第一个候选版本。我们在本次发布中修复了大量问题,提升了性能并增加了功能支持。欢迎任何反馈以帮助我们改进 vLLM Ascend。请按照 " +"[官方文档](https://docs.vllm.ai/projects/ascend/en/latest) 开始使用。" #: ../../source/user_guide/release_notes.md:555 msgid "" @@ -2961,21 +3276,21 @@ msgid "" "ascend/blob/v0.12.0rc1/docs/source/tutorials/DeepSeek-V3.2.md) to start " "using it." msgstr "" -"DeepSeek 3.2 已稳定且性能得到提升。在此版本中,您无需安装任何其他软件包。请按照 [官方教程](https://github.com/vllm-project/vllm-ascend/blob/v0.12.0rc1/docs/source/tutorials/DeepSeek-V3.2.md) 开始使用。" +"DeepSeek 3.2 已稳定且性能得到提升。在此版本中,您无需安装任何其他软件包。请按照 [官方教程](https://github.com" +"/vllm-project/vllm-" +"ascend/blob/v0.12.0rc1/docs/source/tutorials/DeepSeek-V3.2.md) 开始使用。" #: ../../source/user_guide/release_notes.md:556 msgid "" "Async scheduler is more stable and ready to enable now. Please set " "`--async-scheduling` to enable it." -msgstr "" -"异步调度器现已更加稳定并可以启用。请设置 `--async-scheduling` 来启用它。" +msgstr "异步调度器现已更加稳定并可以启用。请设置 `--async-scheduling` 来启用它。" #: ../../source/user_guide/release_notes.md:557 msgid "" "More new models, such as Qwen3-omni, DeepSeek OCR, PaddleOCR, OpenCUA are" " supported now." -msgstr "" -"现已支持更多新模型,例如 Qwen3-omni、DeepSeek OCR、PaddleOCR、OpenCUA。" +msgstr "现已支持更多新模型,例如 Qwen3-omni、DeepSeek OCR、PaddleOCR、OpenCUA。" #: ../../source/user_guide/release_notes.md:559 #: ../../source/user_guide/release_notes.md:629 @@ -3001,6 +3316,7 @@ msgid "Core" msgstr "核心" #: ../../source/user_guide/release_notes.md:561 +#, python-brace-format msgid "" "[Experimental] Full decode only graph mode is supported now. Although it " "is not enabled by default, we suggest to enable it by `--compilation-" @@ -3008,7 +3324,9 @@ msgid "" "know if you hit any error. We'll keep improve it and enable it by default" " in next few release." msgstr "" -"[实验性] 现已支持全解码专用图模式。虽然默认未启用,但我们建议在大多数情况下通过 `--compilation-config '{\"cudagraph_mode\":\"FULL_DECODE_ONLY\"}'` 启用它。如果您遇到任何错误,请告知我们。我们将持续改进,并在未来几个版本中默认启用。" +"[实验性] 现已支持全解码专用图模式。虽然默认未启用,但我们建议在大多数情况下通过 `--compilation-config " +"'{\"cudagraph_mode\":\"FULL_DECODE_ONLY\"}'` " +"启用它。如果您遇到任何错误,请告知我们。我们将持续改进,并在未来几个版本中默认启用。" #: ../../source/user_guide/release_notes.md:562 msgid "" @@ -3021,7 +3339,12 @@ msgid "" " If you're running vLLM Ascend with X86, you need to build triton ascend " "by yourself from [source](https://gitcode.com/Ascend/triton-ascend)" msgstr "" -"新增了大量 triton 内核。vLLM Ascend 的性能,特别是 Qwen3-Next 和 DeepSeek 3.2 的性能得到了提升。请注意,triton 默认未安装和启用,但我们建议在大多数情况下启用它。您可以从 [软件包链接](https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl) 手动下载并安装。如果您在 X86 架构上运行 vLLM Ascend,则需要从 [源代码](https://gitcode.com/Ascend/triton-ascend) 自行构建 triton ascend。" +"新增了大量 triton 内核。vLLM Ascend 的性能,特别是 Qwen3-Next 和 DeepSeek 3.2 " +"的性能得到了提升。请注意,triton 默认未安装和启用,但我们建议在大多数情况下启用它。您可以从 [软件包链接](https://vllm-" +"ascend.obs.cn-north-4.myhuaweicloud.com/vllm-" +"ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl)" +" 手动下载并安装。如果您在 X86 架构上运行 vLLM Ascend,则需要从 [源代码](https://gitcode.com/Ascend" +"/triton-ascend) 自行构建 triton ascend。" #: ../../source/user_guide/release_notes.md:563 msgid "" @@ -3029,28 +3352,26 @@ msgid "" "from this release vLLM Ascend only works with custom ops built. So we " "removed the env `COMPILE_CUSTOM_KERNELS`. You can not set it to 0 now." msgstr "" -"新增了大量 Ascend 算子以提升性能。这意味着从本版本开始,vLLM Ascend 仅在使用自定义算子构建时才能工作。因此我们移除了环境变量 `COMPILE_CUSTOM_KERNELS`。您现在无法再将其设置为 0。" +"新增了大量 Ascend 算子以提升性能。这意味着从本版本开始,vLLM Ascend 仅在使用自定义算子构建时才能工作。因此我们移除了环境变量 " +"`COMPILE_CUSTOM_KERNELS`。您现在无法再将其设置为 0。" #: ../../source/user_guide/release_notes.md:564 msgid "" "speculative decode method `MTP` is more stable now. It can be enabled " "with most case and decode token number can be 1,2,3." -msgstr "" -"推测解码方法 `MTP` 现已更加稳定。它可以在大多数情况下启用,解码令牌数可以是 1、2、3。" +msgstr "推测解码方法 `MTP` 现已更加稳定。它可以在大多数情况下启用,解码令牌数可以是 1、2、3。" #: ../../source/user_guide/release_notes.md:565 msgid "" "speculative decode method `suffix` is supported now. Thanks for the " "contribution from China Merchants Bank." -msgstr "" -"现已支持推测解码方法 `suffix`。感谢招商银行的贡献。" +msgstr "现已支持推测解码方法 `suffix`。感谢招商银行的贡献。" #: ../../source/user_guide/release_notes.md:566 msgid "" "llm-comppressor quantization tool with W8A8 works now. You can now deploy" " the model with W8A8 quantization from this tool directly." -msgstr "" -"支持 W8A8 的 llm-comppressor 量化工具现已可用。您现在可以直接使用此工具部署经过 W8A8 量化的模型。" +msgstr "支持 W8A8 的 llm-comppressor 量化工具现已可用。您现在可以直接使用此工具部署经过 W8A8 量化的模型。" #: ../../source/user_guide/release_notes.md:567 msgid "W4A4 quantization works now." @@ -3063,7 +3384,10 @@ msgid "" "/vllm-project/vllm-ascend/pull/3004) [#3334](https://github.com/vllm-" "project/vllm-ascend/pull/3334)" msgstr "" -"支持论文 [flashcomm](https://arxiv.org/pdf/2412.04964) 中的 flashcomm1 和 flashcomm2 特性 [#3004](https://github.com/vllm-project/vllm-ascend/pull/3004) [#3334](https://github.com/vllm-project/vllm-ascend/pull/3334)" +"支持论文 [flashcomm](https://arxiv.org/pdf/2412.04964) 中的 flashcomm1 和 " +"flashcomm2 特性 [#3004](https://github.com/vllm-project/vllm-" +"ascend/pull/3004) [#3334](https://github.com/vllm-project/vllm-" +"ascend/pull/3334)" #: ../../source/user_guide/release_notes.md:569 msgid "Pooling model, such as bge, reranker, etc. are supported now" @@ -3074,8 +3398,7 @@ msgid "" "Official doc has been improved. we refactored the tutorial to make it " "more clear. The user guide and developer guide is more complete now. " "We'll keep improving it." -msgstr "" -"官方文档已得到改进。我们重构了教程,使其更加清晰。用户指南和开发者指南现在更加完整。我们将持续改进。" +msgstr "官方文档已得到改进。我们重构了教程,使其更加清晰。用户指南和开发者指南现在更加完整。我们将持续改进。" #: ../../source/user_guide/release_notes.md:574 msgid "[Experimental] Mooncake layerwise connector is supported now." @@ -3087,7 +3410,9 @@ msgid "" "pool](https://docs.vllm.ai/projects/ascend/en/latest/developer_guide/Design_Documents/KV_Cache_Pool_Guide.html)" " feature is added" msgstr "" -"[实验性] 新增 [KV 缓存池](https://docs.vllm.ai/projects/ascend/en/latest/developer_guide/Design_Documents/KV_Cache_Pool_Guide.html) 功能" +"[实验性] 新增 [KV " +"缓存池](https://docs.vllm.ai/projects/ascend/en/latest/developer_guide/Design_Documents/KV_Cache_Pool_Guide.html)" +" 功能" #: ../../source/user_guide/release_notes.md:576 msgid "" @@ -3096,7 +3421,9 @@ msgid "" "tutorial](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/graph_mode.html" "#using-xlitegraph) to start using it." msgstr "" -"[实验性] 引入新的图模式 `xlite`。它在某些模型上表现良好。请按照[官方教程](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/graph_mode.html#using-xlitegraph)开始使用。" +"[实验性] 引入新的图模式 " +"`xlite`。它在某些模型上表现良好。请按照[官方教程](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/graph_mode.html" +"#using-xlitegraph)开始使用。" #: ../../source/user_guide/release_notes.md:577 msgid "" @@ -3107,15 +3434,20 @@ msgstr "已移除 LLMdatadist kv 连接器。请改用 mooncake 连接器。" #: ../../source/user_guide/release_notes.md:578 msgid "" "Ascend scheduler is removed. `--additional-config {\"ascend_scheduler\": " -"{\"enabled\": true}` doesn't work anymore." -msgstr "已移除 Ascend 调度器。`--additional-config {\"ascend_scheduler\": {\"enabled\": true}` 不再生效。" +"{\"enabled\": true}}` doesn't work anymore." +msgstr "" +"已移除 Ascend 调度器。`--additional-config {\"ascend_scheduler\": {\"enabled\": " +"true}}` 不再生效。" #: ../../source/user_guide/release_notes.md:579 +#, python-brace-format msgid "" "Torchair graph mode is removed. `--additional-config " "{\"torchair_graph_config\": {\"enabled\": true}}` doesn't work anymore. " "Please use aclgraph instead." -msgstr "已移除 Torchair 图模式。`--additional-config {\"torchair_graph_config\": {\"enabled\": true}}` 不再生效。请改用 aclgraph。" +msgstr "" +"已移除 Torchair 图模式。`--additional-config {\"torchair_graph_config\": " +"{\"enabled\": true}}` 不再生效。请改用 aclgraph。" #: ../../source/user_guide/release_notes.md:580 msgid "" @@ -3134,7 +3466,8 @@ msgid "" "doc](https://docs.vllm.ai/projects/ascend/en/latest/developer_guide/performance_and_debug/msprobe_guide.html)" " to get started." msgstr "" -"新增 msprobe 工具,帮助用户检查模型精度。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest/developer_guide/performance_and_debug/msprobe_guide.html)开始使用。" +"新增 msprobe " +"工具,帮助用户检查模型精度。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest/developer_guide/performance_and_debug/msprobe_guide.html)开始使用。" #: ../../source/user_guide/release_notes.md:583 msgid "" @@ -3144,7 +3477,9 @@ msgid "" "ascend/blob/v0.12.0rc1/docs/source/developer_guide/performance_and_debug/service_profiling_guide.md)" " to get started." msgstr "" -"新增 msserviceprofiler 工具,帮助用户分析模型性能。请按照[官方文档](https://github.com/vllm-project/vllm-ascend/blob/v0.12.0rc1/docs/source/developer_guide/performance_and_debug/service_profiling_guide.md)开始使用。" +"新增 msserviceprofiler 工具,帮助用户分析模型性能。请按照[官方文档](https://github.com/vllm-" +"project/vllm-" +"ascend/blob/v0.12.0rc1/docs/source/developer_guide/performance_and_debug/service_profiling_guide.md)开始使用。" #: ../../source/user_guide/release_notes.md:585 msgid "Upgrade Note" @@ -3159,9 +3494,9 @@ msgstr "vLLM Ascend 自维护的建模文件已被移除,相关的 Python 入 #: ../../source/user_guide/release_notes.md:588 msgid "" -"CANN is upgraded to 8.3.RC2, Pytorch and torch-npu are upgraded to 2.8.0." +"CANN is upgraded to 8.3.RC2, PyTorch and torch-npu are upgraded to 2.8.0." " Don't forget to install them." -msgstr "CANN 已升级至 8.3.RC2,Pytorch 和 torch-npu 已升级至 2.8.0。请勿忘记安装。" +msgstr "CANN 已升级至 8.3.RC2,PyTorch 和 torch-npu 已升级至 2.8.0。请勿忘记安装。" #: ../../source/user_guide/release_notes.md:589 msgid "Python 3.9 support is dropped to keep the same with vLLM v0.12.0" @@ -3172,27 +3507,37 @@ msgid "" "DeepSeek 3/3.1 and Qwen3 doesn't work with FULL_DECODE_ONLY graph mode. " "We'll fix it in next release. [#4990](https://github.com/vllm-project" "/vllm-ascend/pull/4990)" -msgstr "DeepSeek 3/3.1 和 Qwen3 在 FULL_DECODE_ONLY 图模式下无法工作。我们将在下个版本修复此问题。[#4990](https://github.com/vllm-project/vllm-ascend/pull/4990)" +msgstr "" +"DeepSeek 3/3.1 和 Qwen3 在 FULL_DECODE_ONLY " +"图模式下无法工作。我们将在下个版本修复此问题。[#4990](https://github.com/vllm-project/vllm-" +"ascend/pull/4990)" #: ../../source/user_guide/release_notes.md:594 msgid "" "Hunyuan OCR doesn't work. We'll fix it in the next release. " "[#4989](https://github.com/vllm-project/vllm-ascend/pull/4989) " "[#4992](https://github.com/vllm-project/vllm-ascend/pull/4992)" -msgstr "Hunyuan OCR 无法工作。我们将在下个版本修复此问题。[#4989](https://github.com/vllm-project/vllm-ascend/pull/4989) [#4992](https://github.com/vllm-project/vllm-ascend/pull/4992)" +msgstr "" +"Hunyuan OCR 无法工作。我们将在下个版本修复此问题。[#4989](https://github.com/vllm-project" +"/vllm-ascend/pull/4989) [#4992](https://github.com/vllm-project/vllm-" +"ascend/pull/4992)" #: ../../source/user_guide/release_notes.md:595 msgid "" "DeepSeek 3.2 doesn't work with chat template. It because that vLLM " "v0.12.0 doesn't support it. We'll support in the next v0.13.0rc1 version." -msgstr "DeepSeek 3.2 无法与聊天模板协同工作。这是因为 vLLM v0.12.0 不支持它。我们将在下一个 v0.13.0rc1 版本中提供支持。" +msgstr "" +"DeepSeek 3.2 无法与聊天模板协同工作。这是因为 vLLM v0.12.0 不支持它。我们将在下一个 v0.13.0rc1 " +"版本中提供支持。" #: ../../source/user_guide/release_notes.md:596 msgid "" "DeepSeek 3.2 doesn't work with high concurrency in some case. We'll fix " "it in next release. [#4996](https://github.com/vllm-project/vllm-" "ascend/pull/4996)" -msgstr "DeepSeek 3.2 在某些情况下无法在高并发下工作。我们将在下个版本修复此问题。[#4996](https://github.com/vllm-project/vllm-ascend/pull/4996)" +msgstr "" +"DeepSeek 3.2 在某些情况下无法在高并发下工作。我们将在下个版本修复此问题。[#4996](https://github.com" +"/vllm-project/vllm-ascend/pull/4996)" #: ../../source/user_guide/release_notes.md:597 msgid "" @@ -3200,14 +3545,19 @@ msgid "" "that `VLLM_ASCEND_ENABLE_NZ` is enabled by default. Please set " "`VLLM_ASCEND_ENABLE_NZ=0` to disable it. We'll add the auto detection " "mechanism in next release." -msgstr "我们注意到 bf16/fp16 模型性能不佳,主要是因为 `VLLM_ASCEND_ENABLE_NZ` 默认启用。请设置 `VLLM_ASCEND_ENABLE_NZ=0` 来禁用它。我们将在下个版本添加自动检测机制。" +msgstr "" +"我们注意到 bf16/fp16 模型性能不佳,主要是因为 `VLLM_ASCEND_ENABLE_NZ` 默认启用。请设置 " +"`VLLM_ASCEND_ENABLE_NZ=0` 来禁用它。我们将在下个版本添加自动检测机制。" #: ../../source/user_guide/release_notes.md:598 msgid "" "speculative decode method `suffix` doesn't work. We'll fix it in next " "release. You can pick this commit to fix the issue: " "[#5010](https://github.com/vllm-project/vllm-ascend/pull/5010)" -msgstr "推测解码方法 `suffix` 无法工作。我们将在下个版本修复此问题。你可以选取此提交来修复问题:[#5010](https://github.com/vllm-project/vllm-ascend/pull/5010)" +msgstr "" +"推测解码方法 `suffix` " +"无法工作。我们将在下个版本修复此问题。你可以选取此提交来修复问题:[#5010](https://github.com/vllm-project" +"/vllm-ascend/pull/5010)" #: ../../source/user_guide/release_notes.md:600 msgid "v0.11.0rc3 - 2025.12.03" @@ -3219,7 +3569,9 @@ msgid "" "quality reasons, we released a new rc before the official release. Thanks" " for all your feedback. Please follow the [official " "doc](https://docs.vllm.ai/projects/ascend/en/v0.11.0) to get started." -msgstr "这是 vLLM Ascend v0.11.0 的第三个候选发布版本。出于质量考虑,我们在正式发布前发布了新的 rc 版本。感谢您的所有反馈。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.11.0)开始使用。" +msgstr "" +"这是 vLLM Ascend v0.11.0 的第三个候选发布版本。出于质量考虑,我们在正式发布前发布了新的 rc " +"版本。感谢您的所有反馈。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.11.0)开始使用。" #: ../../source/user_guide/release_notes.md:607 msgid "" @@ -3227,50 +3579,67 @@ msgid "" "is a temporary solution. If you find the performance becomes bad, please " "let us know. We'll keep improving it. [#4495](https://github.com/vllm-" "project/vllm-ascend/pull/4495)" -msgstr "禁用 NZ 权重加载器以加速稠密模型。请注意,这是一个临时解决方案。如果您发现性能变差,请告知我们。我们将持续改进。[#4495](https://github.com/vllm-project/vllm-ascend/pull/4495)" +msgstr "" +"禁用 NZ " +"权重加载器以加速稠密模型。请注意,这是一个临时解决方案。如果您发现性能变差,请告知我们。我们将持续改进。[#4495](https://github.com" +"/vllm-project/vllm-ascend/pull/4495)" #: ../../source/user_guide/release_notes.md:608 msgid "" "mooncake is installed in official docker image now. You can use it " "directly in container now. [#4506](https://github.com/vllm-project/vllm-" "ascend/pull/4506)" -msgstr "mooncake 现已安装在官方 Docker 镜像中。您现在可以直接在容器中使用它。[#4506](https://github.com/vllm-project/vllm-ascend/pull/4506)" +msgstr "" +"mooncake 现已安装在官方 Docker 镜像中。您现在可以直接在容器中使用它。[#4506](https://github.com" +"/vllm-project/vllm-ascend/pull/4506)" #: ../../source/user_guide/release_notes.md:612 msgid "" "Fix an OOM issue for moe models. [#4367](https://github.com/vllm-project" "/vllm-ascend/pull/4367)" -msgstr "修复了 MoE 模型的 OOM 问题。[#4367](https://github.com/vllm-project/vllm-ascend/pull/4367)" +msgstr "" +"修复了 MoE 模型的 OOM 问题。[#4367](https://github.com/vllm-project/vllm-" +"ascend/pull/4367)" #: ../../source/user_guide/release_notes.md:613 msgid "" "Fix hang issue of multimodal model when running with DP>1 " "[#4393](https://github.com/vllm-project/vllm-ascend/pull/4393)" -msgstr "修复了多模态模型在 DP>1 运行时挂起的问题 [#4393](https://github.com/vllm-project/vllm-ascend/pull/4393)" +msgstr "" +"修复了多模态模型在 DP>1 运行时挂起的问题 [#4393](https://github.com/vllm-project/vllm-" +"ascend/pull/4393)" #: ../../source/user_guide/release_notes.md:614 msgid "" "Fix some bugs for EPLB [#4416](https://github.com/vllm-project/vllm-" "ascend/pull/4416)" -msgstr "修复了 EPLB 的一些错误 [#4416](https://github.com/vllm-project/vllm-ascend/pull/4416)" +msgstr "" +"修复了 EPLB 的一些错误 [#4416](https://github.com/vllm-project/vllm-" +"ascend/pull/4416)" #: ../../source/user_guide/release_notes.md:615 msgid "" "Fix bug for mtp>1 + lm_head_tp>1 case [#4360](https://github.com/vllm-" "project/vllm-ascend/pull/4360)" -msgstr "修复了 mtp>1 + lm_head_tp>1 情况下的错误 [#4360](https://github.com/vllm-project/vllm-ascend/pull/4360)" +msgstr "" +"修复了 mtp>1 + lm_head_tp>1 情况下的错误 [#4360](https://github.com/vllm-project" +"/vllm-ascend/pull/4360)" #: ../../source/user_guide/release_notes.md:616 msgid "" "Fix a accuracy issue when running vLLM serve for long time. " "[#4117](https://github.com/vllm-project/vllm-ascend/pull/4117)" -msgstr "修复了长时间运行 vLLM 服务时的精度问题。[#4117](https://github.com/vllm-project/vllm-ascend/pull/4117)" +msgstr "" +"修复了长时间运行 vLLM 服务时的精度问题。[#4117](https://github.com/vllm-project/vllm-" +"ascend/pull/4117)" #: ../../source/user_guide/release_notes.md:617 msgid "" "Fix a function bug when running qwen2.5 vl under high concurrency. " "[#4553](https://github.com/vllm-project/vllm-ascend/pull/4553)" -msgstr "修复了在高并发下运行 qwen2.5 vl 时的功能错误。[#4553](https://github.com/vllm-project/vllm-ascend/pull/4553)" +msgstr "" +"修复了在高并发下运行 qwen2.5 vl 时的功能错误。[#4553](https://github.com/vllm-project" +"/vllm-ascend/pull/4553)" #: ../../source/user_guide/release_notes.md:619 msgid "v0.11.0rc2 - 2025.11.21" @@ -3283,56 +3652,76 @@ msgid "" "feedback. We'll keep working on bug fix and performance improvement. The " "v0.11.0 official release will come soon. Please follow the [official " "doc](https://docs.vllm.ai/projects/ascend/en/v0.11.0) to get started." -msgstr "这是 vLLM Ascend v0.11.0 的第二个候选发布版本。在此版本中,我们修复了许多错误以提升质量。感谢您的所有反馈。我们将继续致力于错误修复和性能改进。v0.11.0 正式版即将发布。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.11.0)开始使用。" +msgstr "" +"这是 vLLM Ascend v0.11.0 " +"的第二个候选发布版本。在此版本中,我们修复了许多错误以提升质量。感谢您的所有反馈。我们将继续致力于错误修复和性能改进。v0.11.0 " +"正式版即将发布。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.11.0)开始使用。" #: ../../source/user_guide/release_notes.md:625 msgid "" "CANN is upgraded to 8.3.RC2. [#4332](https://github.com/vllm-project" "/vllm-ascend/pull/4332)" -msgstr "CANN 已升级至 8.3.RC2。[#4332](https://github.com/vllm-project/vllm-ascend/pull/4332)" +msgstr "" +"CANN 已升级至 8.3.RC2。[#4332](https://github.com/vllm-project/vllm-" +"ascend/pull/4332)" #: ../../source/user_guide/release_notes.md:626 msgid "" "Ngram spec decode method is back now. [#4092](https://github.com/vllm-" "project/vllm-ascend/pull/4092)" -msgstr "Ngram 推测解码方法现已恢复。[#4092](https://github.com/vllm-project/vllm-ascend/pull/4092)" +msgstr "" +"Ngram 推测解码方法现已恢复。[#4092](https://github.com/vllm-project/vllm-" +"ascend/pull/4092)" #: ../../source/user_guide/release_notes.md:627 msgid "" "The performance of aclgraph is improved by updating default capture size." " [#4205](https://github.com/vllm-project/vllm-ascend/pull/4205)" -msgstr "通过更新默认捕获大小,提升了 aclgraph 的性能。[#4205](https://github.com/vllm-project/vllm-ascend/pull/4205)" +msgstr "" +"通过更新默认捕获大小,提升了 aclgraph 的性能。[#4205](https://github.com/vllm-project/vllm-" +"ascend/pull/4205)" #: ../../source/user_guide/release_notes.md:631 msgid "" "Speed up vLLM startup time. [#4099](https://github.com/vllm-project/vllm-" "ascend/pull/4099)" -msgstr "加速了 vLLM 启动时间。[#4099](https://github.com/vllm-project/vllm-ascend/pull/4099)" +msgstr "" +"加速了 vLLM 启动时间。[#4099](https://github.com/vllm-project/vllm-" +"ascend/pull/4099)" #: ../../source/user_guide/release_notes.md:632 msgid "" "Kimi k2 with quantization works now. [#4190](https://github.com/vllm-" "project/vllm-ascend/pull/4190)" -msgstr "量化后的 Kimi k2 现已可以工作。[#4190](https://github.com/vllm-project/vllm-ascend/pull/4190)" +msgstr "" +"量化后的 Kimi k2 现已可以工作。[#4190](https://github.com/vllm-project/vllm-" +"ascend/pull/4190)" #: ../../source/user_guide/release_notes.md:633 msgid "" "Fix a bug for qwen3-next. It's more stable now. " "[#4025](https://github.com/vllm-project/vllm-ascend/pull/4025)" -msgstr "修复了 qwen3-next 的一个错误。现在它更稳定了。[#4025](https://github.com/vllm-project/vllm-ascend/pull/4025)" +msgstr "" +"修复了 qwen3-next 的一个错误。现在它更稳定了。[#4025](https://github.com/vllm-project" +"/vllm-ascend/pull/4025)" #: ../../source/user_guide/release_notes.md:637 msgid "" "Fix an issue for full decode only mode. Full graph mode is more stable " "now. [#4106](https://github.com/vllm-project/vllm-ascend/pull/4106) " "[#4282](https://github.com/vllm-project/vllm-ascend/pull/4282)" -msgstr "修复了仅全解码模式的一个问题。全图模式现在更稳定了。[#4106](https://github.com/vllm-project/vllm-ascend/pull/4106) [#4282](https://github.com/vllm-project/vllm-ascend/pull/4282)" +msgstr "" +"修复了仅全解码模式的一个问题。全图模式现在更稳定了。[#4106](https://github.com/vllm-project/vllm-" +"ascend/pull/4106) [#4282](https://github.com/vllm-project/vllm-" +"ascend/pull/4282)" #: ../../source/user_guide/release_notes.md:638 msgid "" "Fix a allgather ops bug for DeepSeek V3 series models. " "[#3711](https://github.com/vllm-project/vllm-ascend/pull/3711)" -msgstr "修复了 DeepSeek V3 系列模型的 allgather 操作错误。[#3711](https://github.com/vllm-project/vllm-ascend/pull/3711)" +msgstr "" +"修复了 DeepSeek V3 系列模型的 allgather 操作错误。[#3711](https://github.com/vllm-" +"project/vllm-ascend/pull/3711)" #: ../../source/user_guide/release_notes.md:639 msgid "" @@ -3349,8 +3738,8 @@ msgid "" "Fix a bug that vl model doesn't work on x86 machine. " "[#4285](https://github.com/vllm-project/vllm-ascend/pull/4285)" msgstr "" -"修复了 VL 模型在 x86 机器上无法工作的错误。 " -"[#4285](https://github.com/vllm-project/vllm-ascend/pull/4285)" +"修复了 VL 模型在 x86 机器上无法工作的错误。 [#4285](https://github.com/vllm-project/vllm-" +"ascend/pull/4285)" #: ../../source/user_guide/release_notes.md:641 msgid "" @@ -3366,8 +3755,8 @@ msgid "" "Add a check that to ensure EPLB only support w8a8 method for quantization" " case. [#4315](https://github.com/vllm-project/vllm-ascend/pull/4315)" msgstr "" -"添加检查以确保 EPLB 在量化场景下仅支持 w8a8 方法。 " -"[#4315](https://github.com/vllm-project/vllm-ascend/pull/4315)" +"添加检查以确保 EPLB 在量化场景下仅支持 w8a8 方法。 [#4315](https://github.com/vllm-project" +"/vllm-ascend/pull/4315)" #: ../../source/user_guide/release_notes.md:643 msgid "" @@ -3383,8 +3772,8 @@ msgid "" "Audio required library is installed in container. " "[#4324](https://github.com/vllm-project/vllm-ascend/pull/4324)" msgstr "" -"容器中已安装音频所需的库。 " -"[#4324](https://github.com/vllm-project/vllm-ascend/pull/4324)" +"容器中已安装音频所需的库。 [#4324](https://github.com/vllm-project/vllm-" +"ascend/pull/4324)" #: ../../source/user_guide/release_notes.md:648 msgid "" @@ -3400,15 +3789,14 @@ msgid "" "`response_format` parameter is not supported yet. We'll support it soon. " "[#4175](https://github.com/vllm-project/vllm-ascend/pull/4175)" msgstr "" -"`response_format` 参数暂不支持,我们将很快提供支持。 " -"[#4175](https://github.com/vllm-project/vllm-ascend/pull/4175)" +"`response_format` 参数暂不支持,我们将很快提供支持。 [#4175](https://github.com/vllm-" +"project/vllm-ascend/pull/4175)" #: ../../source/user_guide/release_notes.md:650 msgid "" "cpu bind feature doesn't work for multi instance case(Such as multi DP on" " one node). We'll fix it in the next release." -msgstr "" -"CPU 绑定功能在多实例场景(例如单节点上多个 DP)下无效。我们将在下一个版本中修复此问题。" +msgstr "CPU 绑定功能在多实例场景(例如单节点上多个 DP)下无效。我们将在下一个版本中修复此问题。" #: ../../source/user_guide/release_notes.md:652 msgid "v0.11.0rc1 - 2025.11.10" @@ -3423,7 +3811,9 @@ msgid "" "release it in the next few days. Any feedback is welcome to help us to " "improve v0.11.0." msgstr "" -"这是 vLLM Ascend v0.11.0 的第一个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.11.0)开始使用。v0.11.0 将是 vLLM Ascend 的下一个正式发布版本,我们将在未来几天内发布。欢迎任何反馈以帮助我们改进 v0.11.0。" +"这是 vLLM Ascend v0.11.0 " +"的第一个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.11.0)开始使用。v0.11.0" +" 将是 vLLM Ascend 的下一个正式发布版本,我们将在未来几天内发布。欢迎任何反馈以帮助我们改进 v0.11.0。" #: ../../source/user_guide/release_notes.md:659 msgid "" @@ -3431,9 +3821,9 @@ msgid "" "[#3945](https://github.com/vllm-project/vllm-ascend/pull/3945) " "[#3896](https://github.com/vllm-project/vllm-ascend/pull/3896)" msgstr "" -"CANN 已升级至 8.3.RC1。Torch-npu 已升级至 2.7.1。 " -"[#3945](https://github.com/vllm-project/vllm-ascend/pull/3945) " -"[#3896](https://github.com/vllm-project/vllm-ascend/pull/3896)" +"CANN 已升级至 8.3.RC1。Torch-npu 已升级至 2.7.1。 [#3945](https://github.com/vllm-" +"project/vllm-ascend/pull/3945) [#3896](https://github.com/vllm-project" +"/vllm-ascend/pull/3896)" #: ../../source/user_guide/release_notes.md:660 msgid "" @@ -3450,7 +3840,9 @@ msgid "" "[single_npu_qwen3_w4a4]." msgstr "" -"现已支持 W4A4 量化。 [#3427](https://github.com/vllm-project/vllm-ascend/pull/3427) 官方教程请参阅 [single_npu_qwen3_w4a4]。" +"现已支持 W4A4 量化。 [#3427](https://github.com/vllm-project/vllm-" +"ascend/pull/3427) 官方教程请参阅 [single_npu_qwen3_w4a4]。" #: ../../source/user_guide/release_notes.md:665 msgid "Performance of Qwen3 and Deepseek V3 series models are improved." @@ -3464,14 +3856,19 @@ msgid "" "/vllm-" "ascend/blob/v0.11.0rc1/docs/source/tutorials/multi_node_pd_disaggregation_mooncake.md)." msgstr "" -"现已支持 Mooncake 分层连接器 [#2602](https://github.com/vllm-project/vllm-ascend/pull/2602)。教程请参阅 [pd_disaggregation_mooncake_multi_node](https://github.com/vllm-project/vllm-ascend/blob/v0.11.0rc1/docs/source/tutorials/multi_node_pd_disaggregation_mooncake.md)。" +"现已支持 Mooncake 分层连接器 [#2602](https://github.com/vllm-project/vllm-" +"ascend/pull/2602)。教程请参阅 " +"[pd_disaggregation_mooncake_multi_node](https://github.com/vllm-project" +"/vllm-" +"ascend/blob/v0.11.0rc1/docs/source/tutorials/multi_node_pd_disaggregation_mooncake.md)。" #: ../../source/user_guide/release_notes.md:667 msgid "" "MTP > 1 is supported now. [#2708](https://github.com/vllm-project/vllm-" "ascend/pull/2708)" msgstr "" -"现已支持 MTP > 1。 [#2708](https://github.com/vllm-project/vllm-ascend/pull/2708)" +"现已支持 MTP > 1。 [#2708](https://github.com/vllm-project/vllm-" +"ascend/pull/2708)" #: ../../source/user_guide/release_notes.md:668 msgid "" @@ -3479,23 +3876,23 @@ msgid "" " will be landing in the next few weeks. [#2128](https://github.com/vllm-" "project/vllm-ascend/pull/2128)" msgstr "" -"[实验性] 现已支持图模式 `FULL_DECODE_ONLY`!`FULL` 模式将在未来几周内推出。 [#2128](https://github.com/vllm-project/vllm-ascend/pull/2128)" +"[实验性] 现已支持图模式 `FULL_DECODE_ONLY`!`FULL` 模式将在未来几周内推出。 " +"[#2128](https://github.com/vllm-project/vllm-ascend/pull/2128)" #: ../../source/user_guide/release_notes.md:669 msgid "" "Pooling models, such as bge-m3, are supported now. " "[#3171](https://github.com/vllm-project/vllm-ascend/pull/3171)" msgstr "" -"现已支持池化模型,例如 bge-m3。 " -"[#3171](https://github.com/vllm-project/vllm-ascend/pull/3171)" +"现已支持池化模型,例如 bge-m3。 [#3171](https://github.com/vllm-project/vllm-" +"ascend/pull/3171)" #: ../../source/user_guide/release_notes.md:673 msgid "" "Refactor the MOE module to make it clearer and easier to understand and " "the performance has improved in both quantitative and non-quantitative " "scenarios." -msgstr "" -"重构了 MOE 模块,使其更清晰易懂,并且在量化和非量化场景下的性能均有所提升。" +msgstr "重构了 MOE 模块,使其更清晰易懂,并且在量化和非量化场景下的性能均有所提升。" #: ../../source/user_guide/release_notes.md:674 msgid "" @@ -3503,14 +3900,14 @@ msgid "" "remove this module in Q4 2025. [#3004](https://github.com/vllm-project" "/vllm-ascend/pull/3004)" msgstr "" -"重构了模型注册模块以方便维护。我们将在 2025 年第四季度移除该模块。 [#3004](https://github.com/vllm-project/vllm-ascend/pull/3004)" +"重构了模型注册模块以方便维护。我们将在 2025 年第四季度移除该模块。 [#3004](https://github.com/vllm-" +"project/vllm-ascend/pull/3004)" #: ../../source/user_guide/release_notes.md:675 msgid "" "Torchair is deprecated. We'll remove it once the performance of ACL Graph" " is good enough. The deadline is Q1 2026." -msgstr "" -"Torchair 已弃用。一旦 ACL Graph 的性能足够好,我们将移除它。截止日期为 2026 年第一季度。" +msgstr "Torchair 已弃用。一旦 ACL Graph 的性能足够好,我们将移除它。截止日期为 2026 年第一季度。" #: ../../source/user_guide/release_notes.md:676 msgid "LLMDatadist KV Connector is deprecated. We'll remove it in Q1 2026." @@ -3523,9 +3920,10 @@ msgid "" "[#3004](https://github.com/vllm-project/vllm-ascend/pull/3004) " "[#3334](https://github.com/vllm-project/vllm-ascend/pull/3334)" msgstr "" -"重构了线性模块以支持论文 [flashcomm](https://arxiv.org/pdf/2412.04964) 中的 flashcomm1 和 flashcomm2 特性。 " -"[#3004](https://github.com/vllm-project/vllm-ascend/pull/3004) " -"[#3334](https://github.com/vllm-project/vllm-ascend/pull/3334)" +"重构了线性模块以支持论文 [flashcomm](https://arxiv.org/pdf/2412.04964) 中的 flashcomm1 " +"和 flashcomm2 特性。 [#3004](https://github.com/vllm-project/vllm-" +"ascend/pull/3004) [#3334](https://github.com/vllm-project/vllm-" +"ascend/pull/3334)" #: ../../source/user_guide/release_notes.md:679 msgid "Known issue" @@ -3535,45 +3933,43 @@ msgstr "已知问题" msgid "" "The memory may be leaked and the service may be stuck after long time " "serving. This is a bug from torch-npu, we'll upgrade and fix it soon." -msgstr "" -"长时间服务后可能出现内存泄漏和服务卡住的问题。这是 torch-npu 的一个错误,我们将尽快升级并修复。" +msgstr "长时间服务后可能出现内存泄漏和服务卡住的问题。这是 torch-npu 的一个错误,我们将尽快升级并修复。" #: ../../source/user_guide/release_notes.md:682 msgid "" "The accuracy of qwen2.5 VL is not very good. This is a bug lead by CANN, " "we fix it soon." -msgstr "" -"qwen2.5 VL 模型的准确性不佳。这是由 CANN 导致的一个错误,我们将尽快修复。" +msgstr "qwen2.5 VL 模型的准确性不佳。这是由 CANN 导致的一个错误,我们将尽快修复。" #: ../../source/user_guide/release_notes.md:683 msgid "" "For long sequence input case, there is no response sometimes and the kv " "cache usage is become higher. This is a bug for scheduler. We are working" " on it." -msgstr "" -"对于长序列输入场景,有时会没有响应,并且 KV 缓存使用率会变高。这是调度器的一个错误,我们正在处理中。" +msgstr "对于长序列输入场景,有时会没有响应,并且 KV 缓存使用率会变高。这是调度器的一个错误,我们正在处理中。" #: ../../source/user_guide/release_notes.md:684 msgid "" "Qwen2-audio doesn't work by default, we're fixing it. Temporary solution " "is to set `--gpu-memory-utilization` to a suitable value, such as 0.8." msgstr "" -"Qwen2-audio 默认无法工作,我们正在修复。临时解决方案是将 `--gpu-memory-utilization` 设置为合适的值,例如 0.8。" +"Qwen2-audio 默认无法工作,我们正在修复。临时解决方案是将 `--gpu-memory-utilization` 设置为合适的值,例如 " +"0.8。" #: ../../source/user_guide/release_notes.md:685 msgid "" "When running Qwen3-Next with expert parallel enabled, please set " "`HCCL_BUFFSIZE` environment variable to a suitable value, such as 1024." -msgstr "" -"当启用专家并行运行 Qwen3-Next 时,请将 `HCCL_BUFFSIZE` 环境变量设置为合适的值,例如 1024。" +msgstr "当启用专家并行运行 Qwen3-Next 时,请将 `HCCL_BUFFSIZE` 环境变量设置为合适的值,例如 1024。" #: ../../source/user_guide/release_notes.md:686 msgid "" "The accuracy of DeepSeek3.2 with aclgraph is not correct. Temporary " -"solution is to set `cudagraph_capture_sizes` to a suitable value " -"depending on the batch size for the input." +"solution is to set `agraph_capture_sizes` to a suitable value depending " +"on the batch size for the input." msgstr "" -"使用 aclgraph 时 DeepSeek3.2 的准确性不正确。临时解决方案是根据输入的批次大小将 `cudagraph_capture_sizes` 设置为合适的值。" +"使用 aclgraph 时 DeepSeek3.2 的准确性不正确。临时解决方案是根据输入的批次大小将 " +"`agraph_capture_sizes` 设置为合适的值。" #: ../../source/user_guide/release_notes.md:688 msgid "v0.11.0rc0 - 2025.09.30" @@ -3585,57 +3981,70 @@ msgid "" "follow the [official doc](https://github.com/vllm-project/vllm-" "ascend/tree/v0.11.0rc0) to get started." msgstr "" -"这是 vLLM Ascend v0.11.0 的特殊候选发布版本。请按照[官方文档](https://github.com/vllm-project/vllm-ascend/tree/v0.11.0rc0)开始使用。" +"这是 vLLM Ascend v0.11.0 的特殊候选发布版本。请按照[官方文档](https://github.com/vllm-" +"project/vllm-ascend/tree/v0.11.0rc0)开始使用。" #: ../../source/user_guide/release_notes.md:694 msgid "" "DeepSeek V3.2 is supported now. [#3270](https://github.com/vllm-project" "/vllm-ascend/pull/3270)" msgstr "" -"现已支持 DeepSeek V3.2。 [#3270](https://github.com/vllm-project/vllm-ascend/pull/3270)" +"现已支持 DeepSeek V3.2。 [#3270](https://github.com/vllm-project/vllm-" +"ascend/pull/3270)" #: ../../source/user_guide/release_notes.md:695 msgid "" "Qwen3-vl is supported now. [#3103](https://github.com/vllm-project/vllm-" "ascend/pull/3103)" msgstr "" -"现已支持 Qwen3-vl。 [#3103](https://github.com/vllm-project/vllm-ascend/pull/3103)" +"现已支持 Qwen3-vl。 [#3103](https://github.com/vllm-project/vllm-" +"ascend/pull/3103)" #: ../../source/user_guide/release_notes.md:699 msgid "" "DeepSeek works with aclgraph now. [#2707](https://github.com/vllm-project" "/vllm-ascend/pull/2707)" msgstr "" -"DeepSeek 现已支持与 aclgraph 协同工作。 [#2707](https://github.com/vllm-project/vllm-ascend/pull/2707)" +"DeepSeek 现已支持与 aclgraph 协同工作。 [#2707](https://github.com/vllm-project" +"/vllm-ascend/pull/2707)" #: ../../source/user_guide/release_notes.md:700 msgid "" "MTP works with aclgraph now. [#2932](https://github.com/vllm-project" "/vllm-ascend/pull/2932)" msgstr "" -"MTP 现已支持与 aclgraph 协同工作。 [#2932](https://github.com/vllm-project/vllm-ascend/pull/2932)" +"MTP 现已支持与 aclgraph 协同工作。 [#2932](https://github.com/vllm-project/vllm-" +"ascend/pull/2932)" #: ../../source/user_guide/release_notes.md:701 msgid "" "EPLB is supported now. [#2956](https://github.com/vllm-project/vllm-" "ascend/pull/2956)" -msgstr "" -"现已支持 EPLB。 [#2956](https://github.com/vllm-project/vllm-ascend/pull/2956)" +msgstr "现已支持 EPLB。 [#2956](https://github.com/vllm-project/vllm-ascend/pull/2956)" #: ../../source/user_guide/release_notes.md:702 msgid "" -"Mooncacke store kvcache connector is supported now. " +"Mooncake store kvcache connector is supported now. " "[#2913](https://github.com/vllm-project/vllm-ascend/pull/2913)" msgstr "" -"现已支持 Mooncake 存储 KV 缓存连接器。 " -"[#2913](https://github.com/vllm-project/vllm-ascend/pull/2913)" +"现已支持 Mooncake 存储 KV 缓存连接器。 [#2913](https://github.com/vllm-project/vllm-" +"ascend/pull/2913)" #: ../../source/user_guide/release_notes.md:703 msgid "" "CPU offload connector is supported now. [#1659](https://github.com/vllm-" "project/vllm-ascend/pull/1659)" msgstr "" -"现已支持 CPU 卸载连接器。 [#1659](https://github.com/vllm-project/vllm-ascend/pull/1659)" +"现已支持 CPU 卸载连接器。 [#1659](https://github.com/vllm-project/vllm-" +"ascend/pull/1659)" + +#: ../../source/user_guide/release_notes.md:707 +msgid "" +"Qwen3-next is stable now. [#3007](https://github.com/vllm-project/vllm-" +"ascend/pull/3007)" +msgstr "" +"Qwen3-next 现已稳定。 [#3007](https://github.com/vllm-project/vllm-" +"ascend/pull/3007)" #: ../../source/user_guide/release_notes.md:708 msgid "" @@ -3645,25 +4054,27 @@ msgid "" "[#3070](https://github.com/vllm-project/vllm-ascend/pull/3070) " "[#3113](https://github.com/vllm-project/vllm-ascend/pull/3113)" msgstr "" -"修复了 Qwen3-next 在 v0.10.2 版本中引入的大量错误。" -"[#2964](https://github.com/vllm-project/vllm-ascend/pull/2964) " -"[#2781](https://github.com/vllm-project/vllm-ascend/pull/2781) " -"[#3070](https://github.com/vllm-project/vllm-ascend/pull/3070) " -"[#3113](https://github.com/vllm-project/vllm-ascend/pull/3113)" +"修复了 Qwen3-next 在 v0.10.2 版本中引入的大量错误。[#2964](https://github.com/vllm-" +"project/vllm-ascend/pull/2964) [#2781](https://github.com/vllm-project" +"/vllm-ascend/pull/2781) [#3070](https://github.com/vllm-project/vllm-" +"ascend/pull/3070) [#3113](https://github.com/vllm-project/vllm-" +"ascend/pull/3113)" #: ../../source/user_guide/release_notes.md:709 msgid "" "The LoRA feature is back now. [#3044](https://github.com/vllm-project" "/vllm-ascend/pull/3044)" msgstr "" -"LoRA 功能现已恢复。 [#3044](https://github.com/vllm-project/vllm-ascend/pull/3044)" +"LoRA 功能现已恢复。 [#3044](https://github.com/vllm-project/vllm-" +"ascend/pull/3044)" #: ../../source/user_guide/release_notes.md:710 msgid "" "Eagle3 spec decode method is back now. [#2949](https://github.com/vllm-" "project/vllm-ascend/pull/2949)" msgstr "" -"Eagle3 推测解码方法现已恢复。 [#2949](https://github.com/vllm-project/vllm-ascend/pull/2949)" +"Eagle3 推测解码方法现已恢复。 [#2949](https://github.com/vllm-project/vllm-" +"ascend/pull/2949)" #: ../../source/user_guide/release_notes.md:712 msgid "v0.10.2rc1 - 2025.09.16" @@ -3675,7 +4086,8 @@ msgid "" "follow the [official doc](https://github.com/vllm-project/vllm-" "ascend/tree/v0.10.2rc1) to get started." msgstr "" -"这是 vLLM Ascend v0.10.2 的第一个候选发布版本。请按照[官方文档](https://github.com/vllm-project/vllm-ascend/tree/v0.10.2rc1)开始使用。" +"这是 vLLM Ascend v0.10.2 的第一个候选发布版本。请按照[官方文档](https://github.com/vllm-" +"project/vllm-ascend/tree/v0.10.2rc1)开始使用。" #: ../../source/user_guide/release_notes.md:718 msgid "" @@ -3687,56 +4099,66 @@ msgid "" "get started. [#2917](https://github.com/vllm-project/vllm-" "ascend/pull/2917)" msgstr "" -"新增对 Qwen3-Next 的支持。请注意,专家并行和 MTP 功能在此版本中不可用。我们将很快添加对它们的支持。请按照[官方指南](https://github.com/vllm-project/vllm-ascend/blob/v0.10.2rc1/docs/source/tutorials/multi_npu_qwen3_next.md)开始使用。 [#2917](https://github.com/vllm-project/vllm-ascend/pull/2917)" +"新增对 Qwen3-Next 的支持。请注意,专家并行和 MTP " +"功能在此版本中不可用。我们将很快添加对它们的支持。请按照[官方指南](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.10.2rc1/docs/source/tutorials/multi_npu_qwen3_next.md)开始使用。" +" [#2917](https://github.com/vllm-project/vllm-ascend/pull/2917)" #: ../../source/user_guide/release_notes.md:719 msgid "" "Added quantization support for aclgraph [#2841](https://github.com/vllm-" "project/vllm-ascend/pull/2841)" msgstr "" -"为 aclgraph 添加量化支持 [#2841](https://github.com/vllm-project/vllm-ascend/pull/2841)" +"为 aclgraph 添加量化支持 [#2841](https://github.com/vllm-project/vllm-" +"ascend/pull/2841)" #: ../../source/user_guide/release_notes.md:723 msgid "" "Aclgraph now works with Ray backend. [#2589](https://github.com/vllm-" "project/vllm-ascend/pull/2589)" msgstr "" -"Aclgraph 现在可与 Ray 后端协同工作。 [#2589](https://github.com/vllm-project/vllm-ascend/pull/2589)" +"Aclgraph 现在可与 Ray 后端协同工作。 [#2589](https://github.com/vllm-project/vllm-" +"ascend/pull/2589)" #: ../../source/user_guide/release_notes.md:724 msgid "" "MTP now works with the token > 1. [#2708](https://github.com/vllm-project" "/vllm-ascend/pull/2708)" msgstr "" -"MTP 现在支持 token > 1 的情况。 [#2708](https://github.com/vllm-project/vllm-ascend/pull/2708)" +"MTP 现在支持 token > 1 的情况。 [#2708](https://github.com/vllm-project/vllm-" +"ascend/pull/2708)" #: ../../source/user_guide/release_notes.md:725 msgid "" "Qwen2.5 VL now works with quantization. [#2778](https://github.com/vllm-" "project/vllm-ascend/pull/2778)" msgstr "" -"Qwen2.5 VL 现在支持量化。 [#2778](https://github.com/vllm-project/vllm-ascend/pull/2778)" +"Qwen2.5 VL 现在支持量化。 [#2778](https://github.com/vllm-project/vllm-" +"ascend/pull/2778)" #: ../../source/user_guide/release_notes.md:726 msgid "" "Improved the performance with async scheduler enabled. " "[#2783](https://github.com/vllm-project/vllm-ascend/pull/2783)" msgstr "" -"启用了异步调度器后,性能得到提升。 [#2783](https://github.com/vllm-project/vllm-ascend/pull/2783)" +"启用了异步调度器后,性能得到提升。 [#2783](https://github.com/vllm-project/vllm-" +"ascend/pull/2783)" #: ../../source/user_guide/release_notes.md:727 msgid "" "Fixed the performance regression with non MLA model when using default " "scheduler. [#2894](https://github.com/vllm-project/vllm-ascend/pull/2894)" msgstr "" -"修复了使用默认调度器时非 MLA 模型的性能回归问题。 [#2894](https://github.com/vllm-project/vllm-ascend/pull/2894)" +"修复了使用默认调度器时非 MLA 模型的性能回归问题。 [#2894](https://github.com/vllm-project/vllm-" +"ascend/pull/2894)" #: ../../source/user_guide/release_notes.md:731 msgid "" "The performance of W8A8 quantization is improved. " "[#2275](https://github.com/vllm-project/vllm-ascend/pull/2275)" msgstr "" -"W8A8 量化的性能得到提升。 [#2275](https://github.com/vllm-project/vllm-ascend/pull/2275)" +"W8A8 量化的性能得到提升。 [#2275](https://github.com/vllm-project/vllm-" +"ascend/pull/2275)" #: ../../source/user_guide/release_notes.md:732 msgid "" @@ -3744,42 +4166,49 @@ msgid "" "/vllm-project/vllm-ascend/pull/2689) [#2842](https://github.com/vllm-" "project/vllm-ascend/pull/2842)" msgstr "" -"MoE 模型的性能得到提升。 [#2689](https://github.com/vllm-project/vllm-ascend/pull/2689) [#2842](https://github.com/vllm-project/vllm-ascend/pull/2842)" +"MoE 模型的性能得到提升。 [#2689](https://github.com/vllm-project/vllm-" +"ascend/pull/2689) [#2842](https://github.com/vllm-project/vllm-" +"ascend/pull/2842)" #: ../../source/user_guide/release_notes.md:733 msgid "" "Fixed resources limit error when apply speculative decoding and aclgraph." " [#2472](https://github.com/vllm-project/vllm-ascend/pull/2472)" msgstr "" -"修复了应用推测解码和 aclgraph 时的资源限制错误。 [#2472](https://github.com/vllm-project/vllm-ascend/pull/2472)" +"修复了应用推测解码和 aclgraph 时的资源限制错误。 [#2472](https://github.com/vllm-project" +"/vllm-ascend/pull/2472)" #: ../../source/user_guide/release_notes.md:734 msgid "" "Fixed the git config error in Docker images. [#2746](https://github.com" "/vllm-project/vllm-ascend/pull/2746)" msgstr "" -"修复了 Docker 镜像中的 git 配置错误。 [#2746](https://github.com/vllm-project/vllm-ascend/pull/2746)" +"修复了 Docker 镜像中的 git 配置错误。 [#2746](https://github.com/vllm-project/vllm-" +"ascend/pull/2746)" #: ../../source/user_guide/release_notes.md:735 msgid "" "Fixed the sliding windows attention bug with prefill. " "[#2758](https://github.com/vllm-project/vllm-ascend/pull/2758)" msgstr "" -"修复了预填充阶段的滑动窗口注意力错误。 [#2758](https://github.com/vllm-project/vllm-ascend/pull/2758)" +"修复了预填充阶段的滑动窗口注意力错误。 [#2758](https://github.com/vllm-project/vllm-" +"ascend/pull/2758)" #: ../../source/user_guide/release_notes.md:736 msgid "" "The official doc for Prefill-Decode Disaggregation with Qwen3 is added. " "[#2751](https://github.com/vllm-project/vllm-ascend/pull/2751)" msgstr "" -"新增了关于 Qwen3 预填充-解码解耦的官方文档。 [#2751](https://github.com/vllm-project/vllm-ascend/pull/2751)" +"新增了关于 Qwen3 预填充-解码解耦的官方文档。 [#2751](https://github.com/vllm-project/vllm-" +"ascend/pull/2751)" #: ../../source/user_guide/release_notes.md:737 msgid "" "`VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP` env works again. " "[#2740](https://github.com/vllm-project/vllm-ascend/pull/2740)" msgstr "" -"环境变量 `VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP` 再次生效。 [#2740](https://github.com/vllm-project/vllm-ascend/pull/2740)" +"环境变量 `VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP` 再次生效。 " +"[#2740](https://github.com/vllm-project/vllm-ascend/pull/2740)" #: ../../source/user_guide/release_notes.md:738 msgid "" @@ -3787,7 +4216,8 @@ msgid "" "`oproj_tensor_parallel_size` to enable this feature. " "[#2167](https://github.com/vllm-project/vllm-ascend/pull/2167)" msgstr "" -"为 DeepSeek 中的 oproj 添加了一项新改进。设置 `oproj_tensor_parallel_size` 以启用此功能。 [#2167](https://github.com/vllm-project/vllm-ascend/pull/2167)" +"为 DeepSeek 中的 oproj 添加了一项新改进。设置 `oproj_tensor_parallel_size` 以启用此功能。 " +"[#2167](https://github.com/vllm-project/vllm-ascend/pull/2167)" #: ../../source/user_guide/release_notes.md:739 msgid "" @@ -3795,14 +4225,16 @@ msgid "" "`graph_batch_sizes` is set. [#2760](https://github.com/vllm-project/vllm-" "ascend/pull/2760)" msgstr "" -"修复了一个 bug:当设置了 `graph_batch_sizes` 时,使用 torchair 的 deepseek 模型未按预期工作。 [#2760](https://github.com/vllm-project/vllm-ascend/pull/2760)" +"修复了一个 bug:当设置了 `graph_batch_sizes` 时,使用 torchair 的 deepseek 模型未按预期工作。 " +"[#2760](https://github.com/vllm-project/vllm-ascend/pull/2760)" #: ../../source/user_guide/release_notes.md:740 msgid "" "Avoid duplicate generation of sin_cos_cache in rope when kv_seqlen > 4k. " "[#2744](https://github.com/vllm-project/vllm-ascend/pull/2744)" msgstr "" -"当 kv_seqlen > 4k 时,避免在 rope 中重复生成 sin_cos_cache。 [#2744](https://github.com/vllm-project/vllm-ascend/pull/2744)" +"当 kv_seqlen > 4k 时,避免在 rope 中重复生成 sin_cos_cache。 " +"[#2744](https://github.com/vllm-project/vllm-ascend/pull/2744)" #: ../../source/user_guide/release_notes.md:741 msgid "" @@ -3811,7 +4243,9 @@ msgid "" "`VLLM_ASCEND_ENABLE_FLASHCOMM=1` to enable it. [#2779](https://github.com" "/vllm-project/vllm-ascend/pull/2779)" msgstr "" -"Qwen3 稠密模型的性能通过 flashcomm_v1 得到提升。设置 `VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE=1` 和 `VLLM_ASCEND_ENABLE_FLASHCOMM=1` 以启用此功能。 [#2779](https://github.com/vllm-project/vllm-ascend/pull/2779)" +"Qwen3 稠密模型的性能通过 flashcomm_v1 得到提升。设置 " +"`VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE=1` 和 `VLLM_ASCEND_ENABLE_FLASHCOMM=1` " +"以启用此功能。 [#2779](https://github.com/vllm-project/vllm-ascend/pull/2779)" #: ../../source/user_guide/release_notes.md:742 msgid "" @@ -3819,35 +4253,40 @@ msgid "" "Set `VLLM_ASCEND_ENABLE_PREFETCH_MLP=1` to enable it. " "[#2816](https://github.com/vllm-project/vllm-ascend/pull/2816)" msgstr "" -"Qwen3 稠密模型的性能通过预取功能得到提升。设置 `VLLM_ASCEND_ENABLE_PREFETCH_MLP=1` 以启用此功能。 [#2816](https://github.com/vllm-project/vllm-ascend/pull/2816)" +"Qwen3 稠密模型的性能通过预取功能得到提升。设置 `VLLM_ASCEND_ENABLE_PREFETCH_MLP=1` 以启用此功能。 " +"[#2816](https://github.com/vllm-project/vllm-ascend/pull/2816)" #: ../../source/user_guide/release_notes.md:743 msgid "" "The performance of Qwen3 MoE model is improved with rope ops update. " "[#2571](https://github.com/vllm-project/vllm-ascend/pull/2571)" msgstr "" -"Qwen3 MoE 模型的性能通过 rope 算子更新得到提升。 [#2571](https://github.com/vllm-project/vllm-ascend/pull/2571)" +"Qwen3 MoE 模型的性能通过 rope 算子更新得到提升。 [#2571](https://github.com/vllm-project" +"/vllm-ascend/pull/2571)" #: ../../source/user_guide/release_notes.md:744 msgid "" "Fix the weight load error for RLHF case. [#2756](https://github.com/vllm-" "project/vllm-ascend/pull/2756)" msgstr "" -"修复了 RLHF 场景下的权重加载错误。 [#2756](https://github.com/vllm-project/vllm-ascend/pull/2756)" +"修复了 RLHF 场景下的权重加载错误。 [#2756](https://github.com/vllm-project/vllm-" +"ascend/pull/2756)" #: ../../source/user_guide/release_notes.md:745 msgid "" "Add warm_up_atb step to speed up the inference. " "[#2823](https://github.com/vllm-project/vllm-ascend/pull/2823)" msgstr "" -"添加 warm_up_atb 步骤以加速推理。 [#2823](https://github.com/vllm-project/vllm-ascend/pull/2823)" +"添加 warm_up_atb 步骤以加速推理。 [#2823](https://github.com/vllm-project/vllm-" +"ascend/pull/2823)" #: ../../source/user_guide/release_notes.md:746 msgid "" "Fixed the aclgraph steam error for moe model. [#2827](https://github.com" "/vllm-project/vllm-ascend/pull/2827)" msgstr "" -"修复了 MoE 模型的 aclgraph 流错误。 [#2827](https://github.com/vllm-project/vllm-ascend/pull/2827)" +"修复了 MoE 模型的 aclgraph 流错误。 [#2827](https://github.com/vllm-project/vllm-" +"ascend/pull/2827)" #: ../../source/user_guide/release_notes.md:750 msgid "" @@ -3856,7 +4295,9 @@ msgid "" "commit](https://github.com/vllm-project/vllm/pull/23917) which is not " "included in v0.10.2. You can pick this commit to fix the issue." msgstr "" -"当预填充和解码阶段使用不同的 TP 大小时,运行预填充-解码解耦会导致服务器挂起。此问题已由 [vLLM commit](https://github.com/vllm-project/vllm/pull/23917) 修复,但该提交未包含在 v0.10.2 中。您可以选取此提交来修复该问题。" +"当预填充和解码阶段使用不同的 TP 大小时,运行预填充-解码解耦会导致服务器挂起。此问题已由 [vLLM " +"commit](https://github.com/vllm-project/vllm/pull/23917) 修复,但该提交未包含在 " +"v0.10.2 中。您可以选取此提交来修复该问题。" #: ../../source/user_guide/release_notes.md:751 msgid "" @@ -3866,7 +4307,9 @@ msgid "" "`gpu_memory_utilization` to suitable value based on your parallel " "configuration to avoid oom error." msgstr "" -"Qwen3-Next 的 HBM 使用率高于预期。这是一个[已知问题](https://github.com/vllm-project/vllm-ascend/issues/2884),我们正在处理中。您可以根据您的并行配置,将 `max_model_len` 和 `gpu_memory_utilization` 设置为合适的值以避免内存溢出错误。" +"Qwen3-Next 的 HBM 使用率高于预期。这是一个[已知问题](https://github.com/vllm-project/vllm-" +"ascend/issues/2884),我们正在处理中。您可以根据您的并行配置,将 `max_model_len` 和 " +"`gpu_memory_utilization` 设置为合适的值以避免内存溢出错误。" #: ../../source/user_guide/release_notes.md:752 msgid "" @@ -3874,7 +4317,8 @@ msgid "" "of KV cache. We will fix it soon. [2941](https://github.com/vllm-project" "/vllm-ascend/issues/2941)" msgstr "" -"我们注意到,由于 KV 缓存的重新设计,LoRA 在此版本中无法工作。我们将尽快修复此问题。 [2941](https://github.com/vllm-project/vllm-ascend/issues/2941)" +"我们注意到,由于 KV 缓存的重新设计,LoRA 在此版本中无法工作。我们将尽快修复此问题。 [2941](https://github.com" +"/vllm-project/vllm-ascend/issues/2941)" #: ../../source/user_guide/release_notes.md:753 msgid "" @@ -3882,7 +4326,7 @@ msgid "" "Ascend scheduler. The performance and accuracy is not good/correct. " "[#2943](https://github.com/vllm-project/vllm-ascend/issues/2943)" msgstr "" -"在使用 Ascend 调度器运行时,请不要启用带有前缀缓存的分块预填充。其性能和准确性不佳/不正确。 [#2943](https://github.com/vllm-project/vllm-ascend/issues/2943)" +"在使用昇腾调度器运行时,请勿启用带前缀缓存的分块预填充。其性能和准确性不佳/不正确。[#2943](https://github.com/vllm-project/vllm-ascend/issues/2943)" #: ../../source/user_guide/release_notes.md:755 msgid "v0.10.1rc1 - 2025.09.04" @@ -3902,7 +4346,7 @@ msgid "" "Merchants Bank. [#2325](https://github.com/vllm-project/vllm-" "ascend/pull/2325)" msgstr "" -"通过招商银行添加的自定义内核,LoRA 性能得到大幅提升。 [#2325](https://github.com/vllm-project/vllm-ascend/pull/2325)" +"通过招商银行添加的自定义内核,LoRA 性能得到大幅提升。[#2325](https://github.com/vllm-project/vllm-ascend/pull/2325)" #: ../../source/user_guide/release_notes.md:762 msgid "" @@ -3910,14 +4354,14 @@ msgid "" "style disaggregate prefill implementation. [#1568](https://github.com" "/vllm-project/vllm-ascend/pull/1568)" msgstr "" -"支持使用 Mooncake TransferEngine 进行 kv 缓存注册和 pull_blocks 风格的解耦预填充实现。 [#1568](https://github.com/vllm-project/vllm-ascend/pull/1568)" +"支持使用 Mooncake TransferEngine 进行 kv 缓存注册和 pull_blocks 风格的解耦预填充实现。[#1568](https://github.com/vllm-project/vllm-ascend/pull/1568)" #: ../../source/user_guide/release_notes.md:763 msgid "" "Support capture custom ops into aclgraph now. [#2113](https://github.com" "/vllm-project/vllm-ascend/pull/2113)" msgstr "" -"现在支持将自定义算子捕获到 aclgraph 中。 [#2113](https://github.com/vllm-project/vllm-ascend/pull/2113)" +"现在支持将自定义算子捕获到 aclgraph 中。[#2113](https://github.com/vllm-project/vllm-ascend/pull/2113)" #: ../../source/user_guide/release_notes.md:767 msgid "" @@ -3925,16 +4369,14 @@ msgid "" " increase memory usage. [#2120](https://github.com/vllm-project/vllm-" "ascend/pull/2120)" msgstr "" -"新增了 MLP 张量并行以提升性能,但请注意这会增加内存使用量。[#2120](https://github.com/vllm-project/vllm-" -"ascend/pull/2120)" +"新增了 MLP 张量并行以提升性能,但请注意这会增加内存使用量。[#2120](https://github.com/vllm-project/vllm-ascend/pull/2120)" #: ../../source/user_guide/release_notes.md:768 msgid "" "openEuler is upgraded to 24.03. [#2631](https://github.com/vllm-project" "/vllm-ascend/pull/2631)" msgstr "" -"openEuler 已升级至 24.03 版本。[#2631](https://github.com/vllm-project/vllm-" -"ascend/pull/2631)" +"openEuler 已升级至 24.03 版本。[#2631](https://github.com/vllm-project/vllm-ascend/pull/2631)" #: ../../source/user_guide/release_notes.md:769 msgid "" @@ -3942,16 +4384,14 @@ msgid "" " and improved TPOT performance. [#2309](https://github.com/vllm-project" "/vllm-ascend/pull/2309)" msgstr "" -"新增了自定义 lmhead 张量并行,以实现更低的内存消耗和更高的 TPOT 性能。[#2309](https://github.com/vllm-project/vllm-" -"ascend/pull/2309)" +"新增了自定义 lmhead 张量并行,以实现更低的内存消耗和更高的 TPOT 性能。[#2309](https://github.com/vllm-project/vllm-ascend/pull/2309)" #: ../../source/user_guide/release_notes.md:770 msgid "" "Qwen3 MoE/Qwen2.5 support torchair graph now. [#2403](https://github.com" "/vllm-project/vllm-ascend/pull/2403)" msgstr "" -"Qwen3 MoE/Qwen2.5 现已支持 torchair 图模式。[#2403](https://github.com/vllm-project/vllm-" -"ascend/pull/2403)" +"Qwen3 MoE/Qwen2.5 现已支持 torchair 图模式。[#2403](https://github.com/vllm-project/vllm-ascend/pull/2403)" #: ../../source/user_guide/release_notes.md:771 msgid "" @@ -3959,8 +4399,7 @@ msgid "" "accuracy issue. [#2528](https://github.com/vllm-project/vllm-" "ascend/pull/2528)" msgstr "" -"支持 AscendScheduler 的滑动窗口注意力机制,从而修复了 Gemma3 的精度问题。[#2528](https://github.com/vllm-project/vllm-" -"ascend/pull/2528)" +"支持 AscendScheduler 的滑动窗口注意力机制,从而修复了 Gemma3 的精度问题。[#2528](https://github.com/vllm-project/vllm-ascend/pull/2528)" #: ../../source/user_guide/release_notes.md:775 #: ../../source/user_guide/release_notes.md:906 @@ -3973,24 +4412,21 @@ msgid "" "problem that NPU stream not enough in some scenarios. " "[#2511](https://github.com/vllm-project/vllm-ascend/pull/2511)" msgstr "" -"更新了图捕获大小的计算方式,在一定程度上缓解了某些场景下 NPU 流不足的问题。[#2511](https://github.com/vllm-project/vllm-" -"ascend/pull/2511)" +"更新了图捕获大小的计算方式,在一定程度上缓解了某些场景下 NPU 流不足的问题。[#2511](https://github.com/vllm-project/vllm-ascend/pull/2511)" #: ../../source/user_guide/release_notes.md:777 msgid "" "Fixed bugs and refactor cached mask generation logic. " "[#2442](https://github.com/vllm-project/vllm-ascend/pull/2442)" msgstr "" -"修复了漏洞并重构了缓存掩码生成逻辑。[#2442](https://github.com/vllm-project/vllm-" -"ascend/pull/2442)" +"修复了漏洞并重构了缓存掩码生成逻辑。[#2442](https://github.com/vllm-project/vllm-ascend/pull/2442)" #: ../../source/user_guide/release_notes.md:778 msgid "" "Fixed the nz format does not work in quantization scenarios. " "[#2549](https://github.com/vllm-project/vllm-ascend/pull/2549)" msgstr "" -"修复了 nz 格式在量化场景下无效的问题。[#2549](https://github.com/vllm-project/vllm-" -"ascend/pull/2549)" +"修复了 nz 格式在量化场景下无效的问题。[#2549](https://github.com/vllm-project/vllm-ascend/pull/2549)" #: ../../source/user_guide/release_notes.md:779 msgid "" @@ -3998,8 +4434,7 @@ msgid "" "`enable_shared_pert_dp` by default. [#2457](https://github.com/vllm-" "project/vllm-ascend/pull/2457)" msgstr "" -"修复了因默认启用 `enable_shared_pert_dp` 导致的 Qwen 系列模型的精度问题。[#2457](https://github.com/vllm-project/vllm-" -"ascend/pull/2457)" +"修复了因默认启用 `enable_shared_pert_dp` 导致的 Qwen 系列模型的精度问题。[#2457](https://github.com/vllm-project/vllm-ascend/pull/2457)" #: ../../source/user_guide/release_notes.md:780 msgid "" @@ -4007,8 +4442,7 @@ msgid "" "dim, e.g., GLM4.5. [#2601](https://github.com/vllm-project/vllm-" "ascend/pull/2601)" msgstr "" -"修复了在 rope 维度不等于头维度的模型(例如 GLM4.5)上的精度问题。[#2601](https://github.com/vllm-project/vllm-" -"ascend/pull/2601)" +"修复了在 rope 维度不等于头维度的模型(例如 GLM4.5)上的精度问题。[#2601](https://github.com/vllm-project/vllm-ascend/pull/2601)" #: ../../source/user_guide/release_notes.md:781 #: ../../source/user_guide/release_notes.md:911 @@ -4020,32 +4454,28 @@ msgid "" "Removed torch.cat and replaced it with List[0]. " "[#2153](https://github.com/vllm-project/vllm-ascend/pull/2153)" msgstr "" -"移除了 torch.cat 并用 List[0] 替代。[#2153](https://github.com/vllm-project/vllm-" -"ascend/pull/2153)" +"移除了 torch.cat 并用 List[0] 替代。[#2153](https://github.com/vllm-project/vllm-ascend/pull/2153)" #: ../../source/user_guide/release_notes.md:783 msgid "" "Converted the format of gmm to nz. [#2474](https://github.com/vllm-" "project/vllm-ascend/pull/2474)" msgstr "" -"将 gmm 的格式转换为 nz。[#2474](https://github.com/vllm-project/vllm-" -"ascend/pull/2474)" +"将 gmm 的格式转换为 nz。[#2474](https://github.com/vllm-project/vllm-ascend/pull/2474)" #: ../../source/user_guide/release_notes.md:784 msgid "" "Optimized parallel strategies to reduce communication overhead. " "[#2198](https://github.com/vllm-project/vllm-ascend/pull/2198)" msgstr "" -"优化了并行策略以减少通信开销。[#2198](https://github.com/vllm-project/vllm-" -"ascend/pull/2198)" +"优化了并行策略以减少通信开销。[#2198](https://github.com/vllm-project/vllm-ascend/pull/2198)" #: ../../source/user_guide/release_notes.md:785 msgid "" "Optimized reject sampler in greedy situation. [#2137](https://github.com" "/vllm-project/vllm-ascend/pull/2137)" msgstr "" -"优化了贪婪模式下的拒绝采样器。[#2137](https://github.com/vllm-project/vllm-" -"ascend/pull/2137)" +"优化了贪婪模式下的拒绝采样器。[#2137](https://github.com/vllm-project/vllm-ascend/pull/2137)" #: ../../source/user_guide/release_notes.md:786 msgid "A batch of refactoring PRs to enhance the code architecture:" @@ -4055,41 +4485,34 @@ msgstr "一系列重构 PR 以增强代码架构:" msgid "" "Refactor on MLA. [#2465](https://github.com/vllm-project/vllm-" "ascend/pull/2465)" -msgstr "" -"重构了 MLA。[#2465](https://github.com/vllm-project/vllm-" -"ascend/pull/2465)" +msgstr "重构了 MLA。[#2465](https://github.com/vllm-project/vllm-ascend/pull/2465)" #: ../../source/user_guide/release_notes.md:788 msgid "" "Refactor on torchair fused_moe. [#2438](https://github.com/vllm-project" "/vllm-ascend/pull/2438)" msgstr "" -"重构了 torchair fused_moe。[#2438](https://github.com/vllm-project/vllm-" -"ascend/pull/2438)" +"重构了 torchair fused_moe。[#2438](https://github.com/vllm-project/vllm-ascend/pull/2438)" #: ../../source/user_guide/release_notes.md:789 msgid "" "Refactor on allgather/mc2-related fused_experts. " "[#2369](https://github.com/vllm-project/vllm-ascend/pull/2369)" msgstr "" -"重构了 allgather/mc2 相关的 fused_experts。[#2369](https://github.com/vllm-project/vllm-" -"ascend/pull/2369)" +"重构了 allgather/mc2 相关的 fused_experts。[#2369](https://github.com/vllm-project/vllm-ascend/pull/2369)" #: ../../source/user_guide/release_notes.md:790 msgid "" "Refactor on torchair model runner. [#2208](https://github.com/vllm-" "project/vllm-ascend/pull/2208)" msgstr "" -"重构了 torchair model runner。[#2208](https://github.com/vllm-project/vllm-" -"ascend/pull/2208)" +"重构了 torchair model runner。[#2208](https://github.com/vllm-project/vllm-ascend/pull/2208)" #: ../../source/user_guide/release_notes.md:791 msgid "" "Refactor on CI. [#2276](https://github.com/vllm-project/vllm-" "ascend/pull/2276)" -msgstr "" -"重构了 CI。[#2276](https://github.com/vllm-project/vllm-" -"ascend/pull/2276)" +msgstr "重构了 CI。[#2276](https://github.com/vllm-project/vllm-ascend/pull/2276)" #: ../../source/user_guide/release_notes.md:792 #: ../../source/user_guide/release_notes.md:926 @@ -4102,8 +4525,7 @@ msgid "" "enable lmhead tensor parallel. [#2309](https://github.com/vllm-project" "/vllm-ascend/pull/2309)" msgstr "" -"在 `additional_config` 中新增了 `lmhead_tensor_parallel_size` 参数,设置该参数以启用 lmhead 张量并行。[#2309](https://github.com/vllm-project/vllm-" -"ascend/pull/2309)" +"在 `additional_config` 中新增了 `lmhead_tensor_parallel_size` 参数,设置该参数以启用 lmhead 张量并行。[#2309](https://github.com/vllm-project/vllm-ascend/pull/2309)" #: ../../source/user_guide/release_notes.md:794 msgid "" @@ -4112,8 +4534,7 @@ msgid "" "`LLMDATADIST_SYNC_CACHE_WAIT_TIME` are removed. " "[#2448](https://github.com/vllm-project/vllm-ascend/pull/2448)" msgstr "" -"移除了未使用的环境变量 `HCCN_PATH`、`PROMPT_DEVICE_ID`、`DECODE_DEVICE_ID`、`LLMDATADIST_COMM_PORT` 和 `LLMDATADIST_SYNC_CACHE_WAIT_TIME`。[#2448](https://github.com/vllm-project/vllm-" -"ascend/pull/2448)" +"移除了未使用的环境变量 `HCCN_PATH`、`PROMPT_DEVICE_ID`、`DECODE_DEVICE_ID`、`LLMDATADIST_COMM_PORT` 和 `LLMDATADIST_SYNC_CACHE_WAIT_TIME`。[#2448](https://github.com/vllm-project/vllm-ascend/pull/2448)" #: ../../source/user_guide/release_notes.md:795 msgid "" @@ -4121,8 +4542,7 @@ msgid "" "`VLLM_ASCEND_LLMDD_RPC_PORT` now. [#2450](https://github.com/vllm-project" "/vllm-ascend/pull/2450)" msgstr "" -"环境变量 `VLLM_LLMDD_RPC_PORT` 现已重命名为 `VLLM_ASCEND_LLMDD_RPC_PORT`。[#2450](https://github.com/vllm-project/vllm-" -"ascend/pull/2450)" +"环境变量 `VLLM_LLMDD_RPC_PORT` 现已重命名为 `VLLM_ASCEND_LLMDD_RPC_PORT`。[#2450](https://github.com/vllm-project/vllm-ascend/pull/2450)" #: ../../source/user_guide/release_notes.md:796 msgid "" @@ -4131,8 +4551,7 @@ msgid "" "provides better performance in eager mode. [#2120](https://github.com" "/vllm-project/vllm-ascend/pull/2120)" msgstr "" -"在环境变量中新增了 `VLLM_ASCEND_ENABLE_MLP_OPTIMIZE`,用于控制在启用张量并行时是否启用 MLP 优化。此功能在 eager 模式下能提供更好的性能。[#2120](https://github.com/vllm-project/vllm-" -"ascend/pull/2120)" +"在环境变量中新增了 `VLLM_ASCEND_ENABLE_MLP_OPTIMIZE`,用于控制在启用张量并行时是否启用 MLP 优化。此功能在 eager 模式下能提供更好的性能。[#2120](https://github.com/vllm-project/vllm-ascend/pull/2120)" #: ../../source/user_guide/release_notes.md:797 msgid "" @@ -4140,16 +4559,14 @@ msgid "" "environment variables. [#2612](https://github.com/vllm-project/vllm-" "ascend/pull/2612)" msgstr "" -"移除了环境变量中的 `MOE_ALL2ALL_BUFFER` 和 `VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ`。[#2612](https://github.com/vllm-project/vllm-" -"ascend/pull/2612)" +"移除了环境变量中的 `MOE_ALL2ALL_BUFFER` 和 `VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ`。[#2612](https://github.com/vllm-project/vllm-ascend/pull/2612)" #: ../../source/user_guide/release_notes.md:798 msgid "" "Added `enable_prefetch` in `additional_config`, Whether to enable weight " "prefetch. [#2465](https://github.com/vllm-project/vllm-ascend/pull/2465)" msgstr "" -"在 `additional_config` 中新增了 `enable_prefetch` 参数,用于控制是否启用权重预取。[#2465](https://github.com/vllm-project/vllm-" -"ascend/pull/2465)" +"在 `additional_config` 中新增了 `enable_prefetch` 参数,用于控制是否启用权重预取。[#2465](https://github.com/vllm-project/vllm-ascend/pull/2465)" #: ../../source/user_guide/release_notes.md:799 msgid "" @@ -4157,8 +4574,7 @@ msgid "" "reduce-overhead mode for torchair, mode needs to be set. " "[#2461](https://github.com/vllm-project/vllm-ascend/pull/2461)" msgstr "" -"在 `additional_config.torchair_graph_config` 中新增了 `mode` 参数,当使用 torchair 的 reduce-overhead 模式时,需要设置此参数。[#2461](https://github.com/vllm-project/vllm-" -"ascend/pull/2461)" +"在 `additional_config.torchair_graph_config` 中新增了 `mode` 参数,当使用 torchair 的 reduce-overhead 模式时,需要设置此参数。[#2461](https://github.com/vllm-project/vllm-ascend/pull/2461)" #: ../../source/user_guide/release_notes.md:800 msgid "" @@ -4166,8 +4582,7 @@ msgid "" "now, and it is recommended to be enabled when inferencing with deepseek. " "[#2457](https://github.com/vllm-project/vllm-ascend/pull/2457)" msgstr "" -"`additional_config` 中的 `enable_shared_expert_dp` 参数现在默认禁用,建议在使用 deepseek 进行推理时启用它。[#2457](https://github.com/vllm-project/vllm-" -"ascend/pull/2457)" +"`additional_config` 中的 `enable_shared_expert_dp` 参数现在默认禁用,建议在使用 deepseek 进行推理时启用它。[#2457](https://github.com/vllm-project/vllm-ascend/pull/2457)" #: ../../source/user_guide/release_notes.md:804 msgid "" @@ -4175,8 +4590,7 @@ msgid "" "could only enable AscendScheduler to run with it. " "[#2729](https://github.com/vllm-project/vllm-ascend/issues/2729)" msgstr "" -"滑动窗口注意力机制目前不支持分块预填充,因此我们只能启用 AscendScheduler 来运行它。[#2729](https://github.com/vllm-project/vllm-" -"ascend/issues/2729)" +"滑动窗口注意力机制目前不支持分块预填充,因此我们只能启用 AscendScheduler 来运行它。[#2729](https://github.com/vllm-project/vllm-ascend/issues/2729)" #: ../../source/user_guide/release_notes.md:805 msgid "" @@ -4184,8 +4598,7 @@ msgid "" "fix it in next release. [#2681](https://github.com/vllm-project/vllm-" "ascend/pull/2681)" msgstr "" -"启用 MultiStream 时,创建 mc2_mask 存在一个漏洞,将在下一个版本中修复。[#2681](https://github.com/vllm-project/vllm-" -"ascend/pull/2681)" +"启用 MultiStream 时,创建 mc2_mask 存在一个漏洞,将在下一个版本中修复。[#2681](https://github.com/vllm-project/vllm-ascend/pull/2681)" #: ../../source/user_guide/release_notes.md:807 msgid "v0.9.1 - 2025.09.03" @@ -4207,15 +4620,14 @@ msgid "" "guide](https://github.com/vllm-project/vllm-" "ascend/blob/v0.9.1/docs/source/tutorials/large_scale_ep.md)." msgstr "" -"在此版本中,我们为大规模专家并行场景添加了许多增强功能。建议遵循[官方指南](https://github.com/vllm-project/vllm-" -"ascend/blob/v0.9.1/docs/source/tutorials/large_scale_ep.md)。" +"在此版本中,我们为大规模专家并行场景添加了许多增强功能。建议遵循[官方指南](https://github.com/vllm-project" +"/vllm-ascend/blob/v0.9.1/docs/source/tutorials/large_scale_ep.md)。" #: ../../source/user_guide/release_notes.md:813 msgid "" "Please note that this release note will list all the important changes " "from last official release(v0.7.3)" -msgstr "" -"请注意,本发布说明将列出自上一个正式版本(v0.7.3)以来的所有重要变更。" +msgstr "请注意,本发布说明将列出自上一个正式版本(v0.7.3)以来的所有重要变更。" #: ../../source/user_guide/release_notes.md:817 msgid "" @@ -4225,7 +4637,9 @@ msgid "" " and [Large Scale Expert Parallelism](https://github.com/vllm-project" "/vllm-ascend/blob/v0.9.1/docs/source/tutorials/large_scale_ep.md)." msgstr "" -"高质量、高性能地支持了 DeepSeek V3/R1。MTP 也能与 DeepSeek 协同工作。请参阅[多节点教程](https://docs.vllm.ai/projects/ascend/en/v0.9.1/tutorials/multi_node.html)和[大规模专家并行](https://github.com/vllm-project/vllm-" +"高质量、高性能地支持了 DeepSeek V3/R1。MTP 也能与 DeepSeek " +"协同工作。请参阅[多节点教程](https://docs.vllm.ai/projects/ascend/en/v0.9.1/tutorials/multi_node.html)和[大规模专家并行](https://github.com" +"/vllm-project/vllm-" "ascend/blob/v0.9.1/docs/source/tutorials/large_scale_ep.md)。" #: ../../source/user_guide/release_notes.md:818 @@ -4234,7 +4648,8 @@ msgid "" "Engine. Please refer to [Qwen " "tutorials](https://docs.vllm.ai/projects/ascend/en/v0.9.1/tutorials/index.html)." msgstr "" -"Qwen 系列模型现在支持图模式。默认情况下,它与 V1 引擎配合工作。请参阅[Qwen 教程](https://docs.vllm.ai/projects/ascend/en/v0.9.1/tutorials/index.html)。" +"Qwen 系列模型现在支持图模式。默认情况下,它与 V1 引擎配合工作。请参阅[Qwen " +"教程](https://docs.vllm.ai/projects/ascend/en/v0.9.1/tutorials/index.html)。" #: ../../source/user_guide/release_notes.md:819 msgid "" @@ -4259,7 +4674,8 @@ msgid "" "[quantization " "guide](https://docs.vllm.ai/projects/ascend/en/v0.9.1/user_guide/feature_guide/quantization.html)." msgstr "" -"现已支持 MOE 和稠密模型的 w4a8 量化。请参阅[量化指南](https://docs.vllm.ai/projects/ascend/en/v0.9.1/user_guide/feature_guide/quantization.html)。" +"现已支持 MOE 和稠密模型的 w4a8 " +"量化。请参阅[量化指南](https://docs.vllm.ai/projects/ascend/en/v0.9.1/user_guide/feature_guide/quantization.html)。" #: ../../source/user_guide/release_notes.md:823 msgid "" @@ -4267,14 +4683,14 @@ msgid "" "mode " "tutorials](https://docs.vllm.ai/projects/ascend/en/v0.9.1/user_guide/feature_guide/sleep_mode.html)." msgstr "" -"V1 引擎现已支持睡眠模式功能。请参阅[睡眠模式教程](https://docs.vllm.ai/projects/ascend/en/v0.9.1/user_guide/feature_guide/sleep_mode.html)。" +"V1 " +"引擎现已支持睡眠模式功能。请参阅[睡眠模式教程](https://docs.vllm.ai/projects/ascend/en/v0.9.1/user_guide/feature_guide/sleep_mode.html)。" #: ../../source/user_guide/release_notes.md:824 msgid "" "Dynamic and Static EPLB support is added. This feature is still " "experimental." -msgstr "" -"已添加动态和静态 EPLB 支持。此功能仍处于实验阶段。" +msgstr "已添加动态和静态 EPLB 支持。此功能仍处于实验阶段。" #: ../../source/user_guide/release_notes.md:826 msgid "Note" @@ -4284,38 +4700,35 @@ msgstr "注意" msgid "" "The following notes are especially for reference when upgrading from last" " final release (v0.7.3):" -msgstr "" -"以下说明特别适用于从上一个正式版本 (v0.7.3) 升级时参考:" +msgstr "以下说明特别适用于从上一个正式版本 (v0.7.3) 升级时参考:" #: ../../source/user_guide/release_notes.md:830 msgid "" "V0 Engine is not supported from this release. Please always set " "`VLLM_USE_V1=1` to use V1 engine with vLLM Ascend." -msgstr "" -"从本版本起不再支持 V0 引擎。请始终设置 `VLLM_USE_V1=1` 以在 vLLM Ascend 中使用 V1 引擎。" +msgstr "从本版本起不再支持 V0 引擎。请始终设置 `VLLM_USE_V1=1` 以在 vLLM Ascend 中使用 V1 引擎。" #: ../../source/user_guide/release_notes.md:831 msgid "" "Mindie Turbo is not needed with this release. And the old version of " "Mindie Turbo is not compatible. Please do not install it. Currently all " "the function and enhancement is included in vLLM Ascend already. We'll " -"consider to add it back in the future in needed." +"consider to add it back in the future if needed." msgstr "" -"本版本不再需要 Mindie Turbo。旧版本的 Mindie Turbo 不兼容,请勿安装。目前所有功能和增强已包含在 vLLM Ascend 中。未来如有需要,我们会考虑重新添加。" +"本版本不再需要 Mindie Turbo。旧版本的 Mindie Turbo 不兼容,请勿安装。目前所有功能和增强已包含在 vLLM Ascend" +" 中。未来如有需要,我们会考虑重新添加。" #: ../../source/user_guide/release_notes.md:832 msgid "" "Torch-npu is upgraded to 2.5.1.post1. CANN is upgraded to 8.2.RC1. Don't " "forget to upgrade them." -msgstr "" -"Torch-npu 已升级至 2.5.1.post1。CANN 已升级至 8.2.RC1。请勿忘记升级。" +msgstr "Torch-npu 已升级至 2.5.1.post1。CANN 已升级至 8.2.RC1。请勿忘记升级。" #: ../../source/user_guide/release_notes.md:836 msgid "" "The Ascend scheduler is added for V1 engine. This scheduler is more " "affine with Ascend hardware." -msgstr "" -"为 V1 引擎新增了 Ascend 调度器。该调度器与 Ascend 硬件更加适配。" +msgstr "为 V1 引擎新增了 Ascend 调度器。该调度器与 Ascend 硬件更加适配。" #: ../../source/user_guide/release_notes.md:837 msgid "Structured output feature works now on V1 Engine." @@ -4334,7 +4747,8 @@ msgid "" "EPLB support for Qwen3-moe model. [#2000](https://github.com/vllm-project" "/vllm-ascend/pull/2000)" msgstr "" -"为 Qwen3-moe 模型添加 EPLB 支持。[#2000](https://github.com/vllm-project/vllm-ascend/pull/2000)" +"为 Qwen3-moe 模型添加 EPLB 支持。[#2000](https://github.com/vllm-project/vllm-" +"ascend/pull/2000)" #: ../../source/user_guide/release_notes.md:843 msgid "" @@ -4344,7 +4758,10 @@ msgid "" "ascend/pull/2554) [#2531](https://github.com/vllm-project/vllm-" "ascend/pull/2531)" msgstr "" -"修复了 MTP 与 Prefill Decode Disaggregation 配合不佳的问题。[#2610](https://github.com/vllm-project/vllm-ascend/pull/2610) [#2554](https://github.com/vllm-project/vllm-ascend/pull/2554) [#2531](https://github.com/vllm-project/vllm-ascend/pull/2531)" +"修复了 MTP 与 Prefill Decode Disaggregation " +"配合不佳的问题。[#2610](https://github.com/vllm-project/vllm-ascend/pull/2610) " +"[#2554](https://github.com/vllm-project/vllm-ascend/pull/2554) " +"[#2531](https://github.com/vllm-project/vllm-ascend/pull/2531)" #: ../../source/user_guide/release_notes.md:844 msgid "" @@ -4353,21 +4770,24 @@ msgid "" "[#2509](https://github.com/vllm-project/vllm-ascend/pull/2509) " "[#2502](https://github.com/vllm-project/vllm-ascend/pull/2502)" msgstr "" -"修复了一些 bug 以确保 Prefill Decode Disaggregation 正常工作。[#2538](https://github.com/vllm-project/vllm-ascend/pull/2538) [#2509](https://github.com/vllm-project/vllm-ascend/pull/2509) [#2502](https://github.com/vllm-project/vllm-ascend/pull/2502)" +"修复了一些 bug 以确保 Prefill Decode Disaggregation " +"正常工作。[#2538](https://github.com/vllm-project/vllm-ascend/pull/2538) " +"[#2509](https://github.com/vllm-project/vllm-ascend/pull/2509) " +"[#2502](https://github.com/vllm-project/vllm-ascend/pull/2502)" #: ../../source/user_guide/release_notes.md:845 msgid "" "Fix file not found error with shutil.rmtree in torchair mode. " "[#2506](https://github.com/vllm-project/vllm-ascend/pull/2506)" msgstr "" -"修复了在 torchair 模式下使用 shutil.rmtree 时出现的文件未找到错误。[#2506](https://github.com/vllm-project/vllm-ascend/pull/2506)" +"修复了在 torchair 模式下使用 shutil.rmtree 时出现的文件未找到错误。[#2506](https://github.com" +"/vllm-project/vllm-ascend/pull/2506)" #: ../../source/user_guide/release_notes.md:849 msgid "" "When running MoE model, Aclgraph mode only work with tensor parallel. " "DP/EP doesn't work in this release." -msgstr "" -"运行 MoE 模型时,Aclgraph 模式仅支持张量并行。DP/EP 在本版本中不可用。" +msgstr "运行 MoE 模型时,Aclgraph 模式仅支持张量并行。DP/EP 在本版本中不可用。" #: ../../source/user_guide/release_notes.md:850 msgid "Pipeline parallelism is not supported in this release for V1 engine." @@ -4377,8 +4797,7 @@ msgstr "本版本中 V1 引擎不支持流水线并行。" msgid "" "If you use w4a8 quantization with eager mode, please set " "`VLLM_ASCEND_MLA_PARALLEL=1` to avoid oom error." -msgstr "" -"如果在 eager 模式下使用 w4a8 量化,请设置 `VLLM_ASCEND_MLA_PARALLEL=1` 以避免内存不足错误。" +msgstr "如果在 eager 模式下使用 w4a8 量化,请设置 `VLLM_ASCEND_MLA_PARALLEL=1` 以避免内存不足错误。" #: ../../source/user_guide/release_notes.md:852 msgid "" @@ -4386,7 +4805,8 @@ msgid "" "real user case. We'll fix it in the next post release. " "[#2654](https://github.com/vllm-project/vllm-ascend/pull/2654)" msgstr "" -"使用某些工具进行的精度测试可能不准确。这不影响实际用户场景。我们将在下一个发布版本中修复此问题。[#2654](https://github.com/vllm-project/vllm-ascend/pull/2654)" +"使用某些工具进行的精度测试可能不准确。这不影响实际用户场景。我们将在下一个发布版本中修复此问题。[#2654](https://github.com" +"/vllm-project/vllm-ascend/pull/2654)" #: ../../source/user_guide/release_notes.md:853 msgid "" @@ -4400,7 +4820,14 @@ msgid "" "[vLLM#23554](https://github.com/vllm-project/vllm/pull/23554) " "[vLLM#23981](https://github.com/vllm-project/vllm/pull/23981)" msgstr "" -"我们注意到,在使用 Prefill Decode Disaggregation 运行 vLLM Ascend 时仍存在一些问题。例如,可能出现内存泄漏或服务卡住。这是由 vLLM 和 vLLM Ascend 的已知问题引起的。我们将在下一个发布版本中修复此问题。[#2650](https://github.com/vllm-project/vllm-ascend/pull/2650) [#2604](https://github.com/vllm-project/vllm-ascend/pull/2604) [vLLM#22736](https://github.com/vllm-project/vllm/pull/22736) [vLLM#23554](https://github.com/vllm-project/vllm/pull/23554) [vLLM#23981](https://github.com/vllm-project/vllm/pull/23981)" +"我们注意到,在使用 Prefill Decode Disaggregation 运行 vLLM Ascend " +"时仍存在一些问题。例如,可能出现内存泄漏或服务卡住。这是由 vLLM 和 vLLM Ascend " +"的已知问题引起的。我们将在下一个发布版本中修复此问题。[#2650](https://github.com/vllm-project/vllm-" +"ascend/pull/2650) [#2604](https://github.com/vllm-project/vllm-" +"ascend/pull/2604) [vLLM#22736](https://github.com/vllm-" +"project/vllm/pull/22736) [vLLM#23554](https://github.com/vllm-" +"project/vllm/pull/23554) [vLLM#23981](https://github.com/vllm-" +"project/vllm/pull/23981)" #: ../../source/user_guide/release_notes.md:855 msgid "v0.9.1rc3 - 2025.08.22" @@ -4412,28 +4839,32 @@ msgid "" "follow the [official " "doc](https://docs.vllm.ai/projects/ascend/en/v0.9.1/) to get started." msgstr "" -"这是 vLLM Ascend v0.9.1 的第三个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.9.1/)开始使用。" +"这是 vLLM Ascend v0.9.1 " +"的第三个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.9.1/)开始使用。" #: ../../source/user_guide/release_notes.md:861 msgid "" "MTP supports V1 scheduler [#2371](https://github.com/vllm-project/vllm-" "ascend/pull/2371)" msgstr "" -"MTP 支持 V1 调度器 [#2371](https://github.com/vllm-project/vllm-ascend/pull/2371)" +"MTP 支持 V1 调度器 [#2371](https://github.com/vllm-project/vllm-" +"ascend/pull/2371)" #: ../../source/user_guide/release_notes.md:862 msgid "" "Add LMhead TP communication groups [#1956](https://github.com/vllm-" "project/vllm-ascend/pull/1956)" msgstr "" -"添加 LMhead 张量并行通信组 [#1956](https://github.com/vllm-project/vllm-ascend/pull/1956)" +"添加 LMhead 张量并行通信组 [#1956](https://github.com/vllm-project/vllm-" +"ascend/pull/1956)" #: ../../source/user_guide/release_notes.md:863 msgid "" "Fix the bug that qwen3 moe doesn't work with aclgraph " "[#2478](https://github.com/vllm-project/vllm-ascend/pull/2478)" msgstr "" -"修复了 qwen3 moe 模型在 aclgraph 模式下无法工作的 bug [#2478](https://github.com/vllm-project/vllm-ascend/pull/2478)" +"修复了 qwen3 moe 模型在 aclgraph 模式下无法工作的 bug [#2478](https://github.com/vllm-" +"project/vllm-ascend/pull/2478)" #: ../../source/user_guide/release_notes.md:864 msgid "" @@ -4441,28 +4872,32 @@ msgid "" "`apply_grammar_bitmask` method [#2314](https://github.com/vllm-project" "/vllm-ascend/pull/2314)" msgstr "" -"修复了因过时的 `apply_grammar_bitmask` 方法导致的 `grammar_bitmask` IndexError [#2314](https://github.com/vllm-project/vllm-ascend/pull/2314)" +"修复了因过时的 `apply_grammar_bitmask` 方法导致的 `grammar_bitmask` IndexError " +"[#2314](https://github.com/vllm-project/vllm-ascend/pull/2314)" #: ../../source/user_guide/release_notes.md:865 msgid "" "Remove `chunked_prefill_for_mla` [#2177](https://github.com/vllm-project" "/vllm-ascend/pull/2177)" msgstr "" -"移除 `chunked_prefill_for_mla` [#2177](https://github.com/vllm-project/vllm-ascend/pull/2177)" +"移除 `chunked_prefill_for_mla` [#2177](https://github.com/vllm-project" +"/vllm-ascend/pull/2177)" #: ../../source/user_guide/release_notes.md:866 msgid "" "Fix bugs and refactor cached mask generation logic " "[#2326](https://github.com/vllm-project/vllm-ascend/pull/2326)" msgstr "" -"修复 bug 并重构缓存掩码生成逻辑 [#2326](https://github.com/vllm-project/vllm-ascend/pull/2326)" +"修复 bug 并重构缓存掩码生成逻辑 [#2326](https://github.com/vllm-project/vllm-" +"ascend/pull/2326)" #: ../../source/user_guide/release_notes.md:867 msgid "" "Fix configuration check logic about ascend scheduler " "[#2327](https://github.com/vllm-project/vllm-ascend/pull/2327)" msgstr "" -"修复关于 Ascend 调度器的配置检查逻辑 [#2327](https://github.com/vllm-project/vllm-ascend/pull/2327)" +"修复关于 Ascend 调度器的配置检查逻辑 [#2327](https://github.com/vllm-project/vllm-" +"ascend/pull/2327)" #: ../../source/user_guide/release_notes.md:868 msgid "" @@ -4470,42 +4905,48 @@ msgid "" "disaggregated-prefill deployment [#2368](https://github.com/vllm-project" "/vllm-ascend/pull/2368)" msgstr "" -"在 disaggregated-prefill 部署中取消 deepseek-mtp 与非 Ascend 调度器之间的验证 [#2368](https://github.com/vllm-project/vllm-ascend/pull/2368)" +"在 disaggregated-prefill 部署中取消 deepseek-mtp 与非 Ascend 调度器之间的验证 " +"[#2368](https://github.com/vllm-project/vllm-ascend/pull/2368)" #: ../../source/user_guide/release_notes.md:869 msgid "" "Fix issue that failed with ray distributed backend " "[#2306](https://github.com/vllm-project/vllm-ascend/pull/2306)" msgstr "" -"修复了使用 ray 分布式后端时失败的问题 [#2306](https://github.com/vllm-project/vllm-ascend/pull/2306)" +"修复了使用 ray 分布式后端时失败的问题 [#2306](https://github.com/vllm-project/vllm-" +"ascend/pull/2306)" #: ../../source/user_guide/release_notes.md:870 msgid "" "Fix incorrect req block length in ascend scheduler " "[#2394](https://github.com/vllm-project/vllm-ascend/pull/2394)" msgstr "" -"修复 Ascend 调度器中请求块长度不正确的问题 [#2394](https://github.com/vllm-project/vllm-ascend/pull/2394)" +"修复 Ascend 调度器中请求块长度不正确的问题 [#2394](https://github.com/vllm-project/vllm-" +"ascend/pull/2394)" #: ../../source/user_guide/release_notes.md:871 msgid "" "Fix header include issue in rope [#2398](https://github.com/vllm-project" "/vllm-ascend/pull/2398)" msgstr "" -"修复 rope 中的头文件包含问题 [#2398](https://github.com/vllm-project/vllm-ascend/pull/2398)" +"修复 rope 中的头文件包含问题 [#2398](https://github.com/vllm-project/vllm-" +"ascend/pull/2398)" #: ../../source/user_guide/release_notes.md:872 msgid "" "Fix mtp config bug [#2412](https://github.com/vllm-project/vllm-" "ascend/pull/2412)" msgstr "" -"修复 mtp 配置 bug [#2412](https://github.com/vllm-project/vllm-ascend/pull/2412)" +"修复 mtp 配置 bug [#2412](https://github.com/vllm-project/vllm-" +"ascend/pull/2412)" #: ../../source/user_guide/release_notes.md:873 msgid "" "Fix error info and adapt `attn_metadata` refactor " "[#2402](https://github.com/vllm-project/vllm-ascend/pull/2402)" msgstr "" -"修复错误信息并适配 `attn_metadata` 重构 [#2402](https://github.com/vllm-project/vllm-ascend/pull/2402)" +"修复错误信息并适配 `attn_metadata` 重构 [#2402](https://github.com/vllm-project" +"/vllm-ascend/pull/2402)" #: ../../source/user_guide/release_notes.md:874 msgid "" @@ -4513,14 +4954,16 @@ msgid "" "`.kv_cache_bytes` file missing [#2312](https://github.com/vllm-project" "/vllm-ascend/pull/2312)" msgstr "" -"修复了因配置不匹配和缺少 `.kv_cache_bytes` 文件导致的 torchair 运行时错误 [#2312](https://github.com/vllm-project/vllm-ascend/pull/2312)" +"修复了因配置不匹配和缺少 `.kv_cache_bytes` 文件导致的 torchair 运行时错误 " +"[#2312](https://github.com/vllm-project/vllm-ascend/pull/2312)" #: ../../source/user_guide/release_notes.md:875 msgid "" "Move `with_prefill` allreduce from cpu to npu [#2230](https://github.com" "/vllm-project/vllm-ascend/pull/2230)" msgstr "" -"将 `with_prefill` 的 allreduce 操作从 CPU 移至 NPU [#2230](https://github.com/vllm-project/vllm-ascend/pull/2230)" +"将 `with_prefill` 的 allreduce 操作从 CPU 移至 NPU [#2230](https://github.com" +"/vllm-project/vllm-ascend/pull/2230)" #: ../../source/user_guide/release_notes.md:877 #: ../../source/user_guide/release_notes.md:1041 @@ -4533,14 +4976,16 @@ msgid "" "Add document for deepseek large EP [#2339](https://github.com/vllm-" "project/vllm-ascend/pull/2339)" msgstr "" -"添加 deepseek 大模型 EP 相关文档 [#2339](https://github.com/vllm-project/vllm-ascend/pull/2339)" +"添加 deepseek 大模型 EP 相关文档 [#2339](https://github.com/vllm-project/vllm-" +"ascend/pull/2339)" #: ../../source/user_guide/release_notes.md:883 msgid "" "`test_aclgraph.py` failed with `\"full_cuda_graph\": True` on A2 (910B1) " "[#2182](https://github.com/vllm-project/vllm-ascend/issues/2182)" msgstr "" -"在 A2 (910B1) 上,当 `\"full_cuda_graph\": True` 时,`test_aclgraph.py` 测试失败 [#2182](https://github.com/vllm-project/vllm-ascend/issues/2182)" +"在 A2 (910B1) 上,当 `\"full_cuda_graph\": True` 时,`test_aclgraph.py` 测试失败 " +"[#2182](https://github.com/vllm-project/vllm-ascend/issues/2182)" #: ../../source/user_guide/release_notes.md:885 msgid "v0.10.0rc1 - 2025.08.07" @@ -4553,7 +4998,8 @@ msgid "" "ascend/tree/v0.10.0rc1) to get started. V0 is completely removed from " "this version." msgstr "" -"这是 vLLM Ascend v0.10.0 的第一个候选发布版本。请按照[官方文档](https://github.com/vllm-project/vllm-ascend/tree/v0.10.0rc1)开始使用。V0 引擎已在此版本中完全移除。" +"这是 vLLM Ascend v0.10.0 的第一个候选发布版本。请按照[官方文档](https://github.com/vllm-" +"project/vllm-ascend/tree/v0.10.0rc1)开始使用。V0 引擎已在此版本中完全移除。" #: ../../source/user_guide/release_notes.md:891 msgid "" @@ -4563,8 +5009,9 @@ msgid "" "project/vllm-" "ascend/blob/v0.10.0rc1/examples/disaggregated_prefill_v1/README.md)." msgstr "" -"解耦预填充现在可与 V1 引擎协同工作。您可以尝试使用 DeepSeek 模型 [#950](https://github.com/vllm-project/vllm-" -"ascend/pull/950),并按照此[教程](https://github.com/vllm-project/vllm-" +"解耦预填充现在可与 V1 引擎协同工作。您可以尝试使用 DeepSeek 模型 [#950](https://github.com/vllm-" +"project/vllm-ascend/pull/950),并按照此[教程](https://github.com/vllm-project" +"/vllm-" "ascend/blob/v0.10.0rc1/examples/disaggregated_prefill_v1/README.md)操作。" #: ../../source/user_guide/release_notes.md:892 @@ -4573,20 +5020,22 @@ msgid "" "[#2060](https://github.com/vllm-project/vllm-ascend/pull/2060) " "[#2172](https://github.com/vllm-project/vllm-ascend/pull/2172)" msgstr "" -"现在已支持对稠密模型和 MoE 模型使用 W4A8 量化方法。 [#2060](https://github.com/vllm-project/vllm-ascend/pull/2060) " -"[#2172](https://github.com/vllm-project/vllm-ascend/pull/2172)" +"现在已支持对稠密模型和 MoE 模型使用 W4A8 量化方法。 [#2060](https://github.com/vllm-project" +"/vllm-ascend/pull/2060) [#2172](https://github.com/vllm-project/vllm-" +"ascend/pull/2172)" #: ../../source/user_guide/release_notes.md:896 msgid "" "Ascend PyTorch adapter (torch_npu) has been upgraded to " "`2.7.1.dev20250724`. [#1562](https://github.com/vllm-project/vllm-" -"ascend/pull/1562) And CANN hase been upgraded to `8.2.RC1`. " +"ascend/pull/1562) And CANN has been upgraded to `8.2.RC1`. " "[#1653](https://github.com/vllm-project/vllm-ascend/pull/1653) Don’t " "forget to update them in your environment or using the latest images." msgstr "" -"Ascend PyTorch 适配器 (torch_npu) 已升级至 `2.7.1.dev20250724`。 [#1562](https://github.com/vllm-project/vllm-" -"ascend/pull/1562) 同时 CANN 已升级至 `8.2.RC1`。 [#1653](https://github.com/vllm-project/vllm-ascend/pull/1653) " -"请勿忘记在您的环境中更新它们或使用最新的镜像。" +"Ascend PyTorch 适配器 (torch_npu) 已升级至 `2.7.1.dev20250724`。 " +"[#1562](https://github.com/vllm-project/vllm-ascend/pull/1562) 同时 CANN " +"已升级至 `8.2.RC1`。 [#1653](https://github.com/vllm-project/vllm-" +"ascend/pull/1653) 请勿忘记在您的环境中更新它们或使用最新的镜像。" #: ../../source/user_guide/release_notes.md:897 msgid "" @@ -4594,8 +5043,8 @@ msgid "" "released from this version on. [#1582](https://github.com/vllm-project" "/vllm-ascend/pull/1582)" msgstr "" -"vLLM Ascend 现已在 Atlas 800I A3 上运行,A3 的镜像将从此版本开始发布。 [#1582](https://github.com/vllm-project" -"/vllm-ascend/pull/1582)" +"vLLM Ascend 现已在 Atlas 800I A3 上运行,A3 的镜像将从此版本开始发布。 " +"[#1582](https://github.com/vllm-project/vllm-ascend/pull/1582)" #: ../../source/user_guide/release_notes.md:898 msgid "" @@ -4605,79 +5054,90 @@ msgid "" "ascend/blob/v0.10.0rc1/docs/source/tutorials/multi_node_kimi.md) to have " "a try. [#2162](https://github.com/vllm-project/vllm-ascend/pull/2162)" msgstr "" -"vLLM Ascend 现已支持采用 w8a8 量化的 Kimi-K2、Qwen3-Coder 和 GLM-4.5 模型,请按照此[教程](https://github.com/vllm-" -"project/vllm-" -"ascend/blob/v0.10.0rc1/docs/source/tutorials/multi_node_kimi.md)进行尝试。 [#2162](https://github.com/vllm-project/vllm-ascend/pull/2162)" +"vLLM Ascend 现已支持采用 w8a8 量化的 Kimi-K2、Qwen3-Coder 和 GLM-4.5 " +"模型,请按照此[教程](https://github.com/vllm-project/vllm-" +"ascend/blob/v0.10.0rc1/docs/source/tutorials/multi_node_kimi.md)进行尝试。 " +"[#2162](https://github.com/vllm-project/vllm-ascend/pull/2162)" #: ../../source/user_guide/release_notes.md:899 msgid "" "Pipeline Parallelism is supported in V1 now. [#1800](https://github.com" "/vllm-project/vllm-ascend/pull/1800)" msgstr "" -"V1 引擎现已支持流水线并行。 [#1800](https://github.com/vllm-project/vllm-ascend/pull/1800)" +"V1 引擎现已支持流水线并行。 [#1800](https://github.com/vllm-project/vllm-" +"ascend/pull/1800)" #: ../../source/user_guide/release_notes.md:900 msgid "" "Prefix cache feature now work with the Ascend Scheduler. " "[#1446](https://github.com/vllm-project/vllm-ascend/pull/1446)" msgstr "" -"前缀缓存功能现在可与 Ascend 调度器协同工作。 [#1446](https://github.com/vllm-project/vllm-ascend/pull/1446)" +"前缀缓存功能现在可与 Ascend 调度器协同工作。 [#1446](https://github.com/vllm-project/vllm-" +"ascend/pull/1446)" #: ../../source/user_guide/release_notes.md:901 msgid "" "Torchair graph mode works with tp > 4 now. [#1508](https://github.com" "/vllm-project/vllm-ascend/issues/1508)" msgstr "" -"Torchair 图模式现在支持 tp > 4 的情况。 [#1508](https://github.com/vllm-project/vllm-ascend/issues/1508)" +"Torchair 图模式现在支持 tp > 4 的情况。 [#1508](https://github.com/vllm-project" +"/vllm-ascend/issues/1508)" #: ../../source/user_guide/release_notes.md:902 msgid "" "MTP support torchair graph mode now [#2145](https://github.com/vllm-" "project/vllm-ascend/pull/2145)" msgstr "" -"MTP 现在支持 torchair 图模式 [#2145](https://github.com/vllm-project/vllm-ascend/pull/2145)" +"MTP 现在支持 torchair 图模式 [#2145](https://github.com/vllm-project/vllm-" +"ascend/pull/2145)" #: ../../source/user_guide/release_notes.md:907 msgid "" "Fix functional problem of multimodality models like Qwen2-audio with " "Aclgraph. [#1803](https://github.com/vllm-project/vllm-ascend/pull/1803)" msgstr "" -"修复了 Qwen2-audio 等多模态模型与 Aclgraph 配合使用时的功能问题。 [#1803](https://github.com/vllm-project/vllm-ascend/pull/1803)" +"修复了 Qwen2-audio 等多模态模型与 Aclgraph 配合使用时的功能问题。 [#1803](https://github.com" +"/vllm-project/vllm-ascend/pull/1803)" #: ../../source/user_guide/release_notes.md:908 msgid "" "Fix the process group creating error with external launch scenario. " "[#1681](https://github.com/vllm-project/vllm-ascend/pull/1681)" msgstr "" -"修复了在外部启动场景下创建进程组的错误。 [#1681](https://github.com/vllm-project/vllm-ascend/pull/1681)" +"修复了在外部启动场景下创建进程组的错误。 [#1681](https://github.com/vllm-project/vllm-" +"ascend/pull/1681)" #: ../../source/user_guide/release_notes.md:909 msgid "" "Fix the functional problem with guided decoding. " "[#2022](https://github.com/vllm-project/vllm-ascend/pull/2022)" msgstr "" -"修复了引导式解码的功能问题。 [#2022](https://github.com/vllm-project/vllm-ascend/pull/2022)" +"修复了引导式解码的功能问题。 [#2022](https://github.com/vllm-project/vllm-" +"ascend/pull/2022)" #: ../../source/user_guide/release_notes.md:910 msgid "" "Fix the accuracy issue with common MoE models in DP scenario. " "[#1856](https://github.com/vllm-project/vllm-ascend/pull/1856)" msgstr "" -"修复了在 DP 场景下常见 MoE 模型的准确性问题。 [#1856](https://github.com/vllm-project/vllm-ascend/pull/1856)" +"修复了在 DP 场景下常见 MoE 模型的准确性问题。 [#1856](https://github.com/vllm-project/vllm-" +"ascend/pull/1856)" #: ../../source/user_guide/release_notes.md:912 msgid "" "Caching sin/cos instead of calculate it every layer. " "[#1890](https://github.com/vllm-project/vllm-ascend/pull/1890)" msgstr "" -"缓存 sin/cos 值,而不是在每一层都重新计算。 [#1890](https://github.com/vllm-project/vllm-ascend/pull/1890)" +"缓存 sin/cos 值,而不是在每一层都重新计算。 [#1890](https://github.com/vllm-project/vllm-" +"ascend/pull/1890)" #: ../../source/user_guide/release_notes.md:913 msgid "" "Improve shared expert multi-stream parallelism [#1891](https://github.com" "/vllm-project/vllm-ascend/pull/1891)" msgstr "" -"改进了共享专家的多流并行 [#1891](https://github.com/vllm-project/vllm-ascend/pull/1891)" +"改进了共享专家的多流并行 [#1891](https://github.com/vllm-project/vllm-" +"ascend/pull/1891)" #: ../../source/user_guide/release_notes.md:914 msgid "" @@ -4686,14 +5146,17 @@ msgid "" "`VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE` to `1`. [#1926](https://github.com" "/vllm-project/vllm-ascend/pull/1926)" msgstr "" -"在启用张量并行时,实现了预填充阶段 allreduce 与 matmul 的融合。通过设置 `VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE` 为 `1` 来启用此功能。 [#1926](https://github.com/vllm-project/vllm-ascend/pull/1926)" +"在启用张量并行时,实现了预填充阶段 allreduce 与 matmul 的融合。通过设置 " +"`VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE` 为 `1` 来启用此功能。 " +"[#1926](https://github.com/vllm-project/vllm-ascend/pull/1926)" #: ../../source/user_guide/release_notes.md:915 msgid "" "Optimize Quantized MoE Performance by Reducing All2All Communication. " "[#2195](https://github.com/vllm-project/vllm-ascend/pull/2195)" msgstr "" -"通过减少 All2All 通信来优化量化 MoE 的性能。 [#2195](https://github.com/vllm-project/vllm-ascend/pull/2195)" +"通过减少 All2All 通信来优化量化 MoE 的性能。 [#2195](https://github.com/vllm-project" +"/vllm-ascend/pull/2195)" #: ../../source/user_guide/release_notes.md:916 msgid "" @@ -4701,22 +5164,24 @@ msgid "" "performance [#1806](https://github.com/vllm-project/vllm-" "ascend/pull/1806)" msgstr "" -"在自定义模型中使用 AddRmsNormQuant 算子以优化 Qwen3 的性能 [#1806](https://github.com/vllm-project/vllm-" -"ascend/pull/1806)" +"在自定义模型中使用 AddRmsNormQuant 算子以优化 Qwen3 的性能 [#1806](https://github.com" +"/vllm-project/vllm-ascend/pull/1806)" #: ../../source/user_guide/release_notes.md:917 msgid "" "Use multicast to avoid padding decode request to prefill size " "[#1555](https://github.com/vllm-project/vllm-ascend/pull/1555)" msgstr "" -"使用组播来避免将解码请求填充至预填充大小 [#1555](https://github.com/vllm-project/vllm-ascend/pull/1555)" +"使用组播来避免将解码请求填充至预填充大小 [#1555](https://github.com/vllm-project/vllm-" +"ascend/pull/1555)" #: ../../source/user_guide/release_notes.md:918 msgid "" "The performance of LoRA has been improved. [#1884](https://github.com" "/vllm-project/vllm-ascend/pull/1884)" msgstr "" -"LoRA 的性能已得到提升。 [#1884](https://github.com/vllm-project/vllm-ascend/pull/1884)" +"LoRA 的性能已得到提升。 [#1884](https://github.com/vllm-project/vllm-" +"ascend/pull/1884)" #: ../../source/user_guide/release_notes.md:919 msgid "A batch of refactoring prs to enhance the code architecture:" @@ -4727,42 +5192,48 @@ msgid "" "Torchair model runner refactor [#2205](https://github.com/vllm-project" "/vllm-ascend/pull/2205)" msgstr "" -"重构 Torchair 模型运行器 [#2205](https://github.com/vllm-project/vllm-ascend/pull/2205)" +"重构 Torchair 模型运行器 [#2205](https://github.com/vllm-project/vllm-" +"ascend/pull/2205)" #: ../../source/user_guide/release_notes.md:921 msgid "" "Refactoring forward_context and model_runner_v1. " "[#1979](https://github.com/vllm-project/vllm-ascend/pull/1979)" msgstr "" -"重构 forward_context 和 model_runner_v1。 [#1979](https://github.com/vllm-project/vllm-ascend/pull/1979)" +"重构 forward_context 和 model_runner_v1。 [#1979](https://github.com/vllm-" +"project/vllm-ascend/pull/1979)" #: ../../source/user_guide/release_notes.md:922 msgid "" "Refactor AscendMetaData Comments. [#1967](https://github.com/vllm-project" "/vllm-ascend/pull/1967)" msgstr "" -"重构 AscendMetaData 注释。 [#1967](https://github.com/vllm-project/vllm-ascend/pull/1967)" +"重构 AscendMetaData 注释。 [#1967](https://github.com/vllm-project/vllm-" +"ascend/pull/1967)" #: ../../source/user_guide/release_notes.md:923 msgid "" "Refactor torchair utils. [#1892](https://github.com/vllm-project/vllm-" "ascend/pull/1892)" msgstr "" -"重构 torchair 工具集。 [#1892](https://github.com/vllm-project/vllm-ascend/pull/1892)" +"重构 torchair 工具集。 [#1892](https://github.com/vllm-project/vllm-" +"ascend/pull/1892)" #: ../../source/user_guide/release_notes.md:924 msgid "" "Refactor torchair worker. [#1885](https://github.com/vllm-project/vllm-" "ascend/pull/1885)" msgstr "" -"重构 torchair worker。 [#1885](https://github.com/vllm-project/vllm-ascend/pull/1885)" +"重构 torchair worker。 [#1885](https://github.com/vllm-project/vllm-" +"ascend/pull/1885)" #: ../../source/user_guide/release_notes.md:925 msgid "" "Register activation customop instead of overwrite forward_oot. " "[#1841](https://github.com/vllm-project/vllm-ascend/pull/1841)" msgstr "" -"注册激活自定义算子,而非覆盖 forward_oot。 [#1841](https://github.com/vllm-project/vllm-ascend/pull/1841)" +"注册激活自定义算子,而非覆盖 forward_oot。 [#1841](https://github.com/vllm-project/vllm-" +"ascend/pull/1841)" #: ../../source/user_guide/release_notes.md:927 msgid "" @@ -4770,14 +5241,14 @@ msgid "" "the EP and TP is aligned with vLLM now. [#1681](https://github.com/vllm-" "project/vllm-ascend/pull/1681)" msgstr "" -"`additional_config` 中的 `expert_tensor_parallel_size` 现已被移除,EP 和 TP 现已与 vLLM 对齐。 [#1681](https://github.com/vllm-project/vllm-ascend/pull/1681)" +"`additional_config` 中的 `expert_tensor_parallel_size` 现已被移除,EP 和 TP 现已与 " +"vLLM 对齐。 [#1681](https://github.com/vllm-project/vllm-ascend/pull/1681)" #: ../../source/user_guide/release_notes.md:928 msgid "" "Add `VLLM_ASCEND_MLA_PA` in environ variables, use this to enable mla " "paged attention operator for deepseek mla decode." -msgstr "" -"在环境变量中添加 `VLLM_ASCEND_MLA_PA`,用于启用 deepseek mla 解码的 mla 分页注意力算子。" +msgstr "在环境变量中添加 `VLLM_ASCEND_MLA_PA`,用于启用 deepseek mla 解码的 mla 分页注意力算子。" #: ../../source/user_guide/release_notes.md:929 msgid "" @@ -4785,7 +5256,8 @@ msgid "" "`MatmulAllReduce` fusion kernel when tensor parallel is enabled. This " "feature is supported in A2, and eager mode will get better performance." msgstr "" -"在环境变量中添加 `VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE`,用于在启用张量并行时启用 `MatmulAllReduce` 融合内核。此功能在 A2 上受支持,且 eager 模式将获得更好的性能。" +"在环境变量中添加 `VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE`,用于在启用张量并行时启用 " +"`MatmulAllReduce` 融合内核。此功能在 A2 上受支持,且 eager 模式将获得更好的性能。" #: ../../source/user_guide/release_notes.md:930 msgid "" @@ -4793,28 +5265,32 @@ msgid "" " enable moe all2all seq, this provides a basic framework on the basis of " "alltoall for easy expansion." msgstr "" -"在环境变量中添加 `VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ`,用于决定是否启用 moe all2all seq,这为在 alltoall 基础上进行扩展提供了一个基础框架。" +"在环境变量中添加 `VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ`,用于决定是否启用 moe all2all " +"seq,这为在 alltoall 基础上进行扩展提供了一个基础框架。" #: ../../source/user_guide/release_notes.md:932 msgid "" "UT coverage reached 76.34% after a batch of prs followed by this rfc: " "[#1298](https://github.com/vllm-project/vllm-ascend/issues/1298)" msgstr "" -"在遵循此 RFC [#1298](https://github.com/vllm-project/vllm-ascend/issues/1298) 提交一系列 PR 后,单元测试覆盖率已达到 76.34%。" +"在遵循此 RFC [#1298](https://github.com/vllm-project/vllm-ascend/issues/1298)" +" 提交一系列 PR 后,单元测试覆盖率已达到 76.34%。" #: ../../source/user_guide/release_notes.md:933 msgid "" "Sequence Parallelism works for Qwen3 MoE. [#2209](https://github.com" "/vllm-project/vllm-ascend/issues/2209)" msgstr "" -"序列并行现可用于 Qwen3 MoE。 [#2209](https://github.com/vllm-project/vllm-ascend/issues/2209)" +"序列并行现可用于 Qwen3 MoE。 [#2209](https://github.com/vllm-project/vllm-" +"ascend/issues/2209)" #: ../../source/user_guide/release_notes.md:934 msgid "" "Chinese online document is added now. [#1870](https://github.com/vllm-" "project/vllm-ascend/issues/1870)" msgstr "" -"现已添加中文在线文档。 [#1870](https://github.com/vllm-project/vllm-ascend/issues/1870)" +"现已添加中文在线文档。 [#1870](https://github.com/vllm-project/vllm-" +"ascend/issues/1870)" #: ../../source/user_guide/release_notes.md:938 msgid "" @@ -4822,7 +5298,8 @@ msgid "" "number of npu stream that Aclgraph needed to capture graph is not enough." " [#2229](https://github.com/vllm-project/vllm-ascend/issues/2229)" msgstr "" -"Aclgraph 目前无法与 DP + EP 协同工作,主要差距在于 Aclgraph 捕获图所需的 NPU 流数量不足。 [#2229](https://github.com/vllm-project/vllm-ascend/issues/2229)" +"Aclgraph 目前无法与 DP + EP 协同工作,主要差距在于 Aclgraph 捕获图所需的 NPU 流数量不足。 " +"[#2229](https://github.com/vllm-project/vllm-ascend/issues/2229)" #: ../../source/user_guide/release_notes.md:939 msgid "" @@ -4830,21 +5307,24 @@ msgid "" "multistream enabled. This will be fixed in the next release. " "[#2232](https://github.com/vllm-project/vllm-ascend/issues/2232)" msgstr "" -"启用多流时,W8A8 动态量化的 DeepSeek 模型存在准确性问题。此问题将在下一个版本中修复。 [#2232](https://github.com/vllm-project/vllm-ascend/issues/2232)" +"启用多流时,W8A8 动态量化的 DeepSeek 模型存在准确性问题。此问题将在下一个版本中修复。 " +"[#2232](https://github.com/vllm-project/vllm-ascend/issues/2232)" #: ../../source/user_guide/release_notes.md:940 msgid "" "In Qwen3 MoE, SP cannot be incorporated into the Aclgraph. " "[#2246](https://github.com/vllm-project/vllm-ascend/issues/2246)" msgstr "" -"在 Qwen3 MoE 中,序列并行无法集成到 Aclgraph 中。 [#2246](https://github.com/vllm-project/vllm-ascend/issues/2246)" +"在 Qwen3 MoE 中,序列并行无法集成到 Aclgraph 中。 [#2246](https://github.com/vllm-" +"project/vllm-ascend/issues/2246)" #: ../../source/user_guide/release_notes.md:941 msgid "" "MTP not support V1 scheduler currently, will fix it in Q3. " "[#2254](https://github.com/vllm-project/vllm-ascend/issues/2254)" msgstr "" -"MTP 目前不支持 V1 调度器,将在第三季度修复此问题。 [#2254](https://github.com/vllm-project/vllm-ascend/issues/2254)" +"MTP 目前不支持 V1 调度器,将在第三季度修复此问题。 [#2254](https://github.com/vllm-project" +"/vllm-ascend/issues/2254)" #: ../../source/user_guide/release_notes.md:942 msgid "" @@ -4852,8 +5332,8 @@ msgid "" "some issue on vLLM. [#2254](https://github.com/vllm-project/vllm-" "ascend/issues/2254)" msgstr "" -"当以 DP > 1 运行 MTP 时,由于 vLLM 的某些问题,需要禁用指标记录器。 [#2254](https://github.com/vllm-project/vllm-" -"ascend/issues/2254)" +"当以 DP > 1 运行 MTP 时,由于 vLLM 的某些问题,需要禁用指标记录器。 [#2254](https://github.com" +"/vllm-project/vllm-ascend/issues/2254)" #: ../../source/user_guide/release_notes.md:944 msgid "v0.9.1rc2 - 2025.08.04" @@ -4865,7 +5345,8 @@ msgid "" "follow the [official " "doc](https://docs.vllm.ai/projects/ascend/en/v0.9.1/) to get started." msgstr "" -"这是 vLLM Ascend v0.9.1 的第二个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.9.1/)开始使用。" +"这是 vLLM Ascend v0.9.1 " +"的第二个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.9.1/)开始使用。" #: ../../source/user_guide/release_notes.md:950 msgid "" @@ -4875,14 +5356,19 @@ msgid "" "/vllm-ascend/pull/1275) [#1480](https://github.com/vllm-project/vllm-" "ascend/pull/1480)" msgstr "" -"现已支持 MOE 和密集模型的 w4a8 量化:[#1320](https://github.com/vllm-project/vllm-ascend/pull/1320) [#1910](https://github.com/vllm-project/vllm-ascend/pull/1910) [#1275](https://github.com/vllm-project/vllm-ascend/pull/1275) [#1480](https://github.com/vllm-project/vllm-ascend/pull/1480)" +"现已支持 MOE 和密集模型的 w4a8 量化:[#1320](https://github.com/vllm-project/vllm-" +"ascend/pull/1320) [#1910](https://github.com/vllm-project/vllm-" +"ascend/pull/1910) [#1275](https://github.com/vllm-project/vllm-" +"ascend/pull/1275) [#1480](https://github.com/vllm-project/vllm-" +"ascend/pull/1480)" #: ../../source/user_guide/release_notes.md:951 msgid "" "Dynamic EPLB support in [#1943](https://github.com/vllm-project/vllm-" "ascend/pull/1943)" msgstr "" -"在 [#1943](https://github.com/vllm-project/vllm-ascend/pull/1943) 中支持了动态 EPLB。" +"在 [#1943](https://github.com/vllm-project/vllm-ascend/pull/1943) 中支持了动态 " +"EPLB。" #: ../../source/user_guide/release_notes.md:952 msgid "" @@ -4898,7 +5384,16 @@ msgid "" "[#2083](https://github.com/vllm-project/vllm-ascend/pull/2083) " "[#1989](https://github.com/vllm-project/vllm-ascend/pull/1989)" msgstr "" -"为 V1 引擎支持并改进了分离式预填充,持续开发和稳定该功能,包括针对单机设置的性能提升和错误修复:[#1953](https://github.com/vllm-project/vllm-ascend/pull/1953) [#1612](https://github.com/vllm-project/vllm-ascend/pull/1612) [#1361](https://github.com/vllm-project/vllm-ascend/pull/1361) [#1746](https://github.com/vllm-project/vllm-ascend/pull/1746) [#1552](https://github.com/vllm-project/vllm-ascend/pull/1552) [#1801](https://github.com/vllm-project/vllm-ascend/pull/1801) [#2083](https://github.com/vllm-project/vllm-ascend/pull/2083) [#1989](https://github.com/vllm-project/vllm-ascend/pull/1989)" +"为 V1 " +"引擎支持并改进了分离式预填充,持续开发和稳定该功能,包括针对单机设置的性能提升和错误修复:[#1953](https://github.com" +"/vllm-project/vllm-ascend/pull/1953) [#1612](https://github.com/vllm-" +"project/vllm-ascend/pull/1612) [#1361](https://github.com/vllm-project" +"/vllm-ascend/pull/1361) [#1746](https://github.com/vllm-project/vllm-" +"ascend/pull/1746) [#1552](https://github.com/vllm-project/vllm-" +"ascend/pull/1552) [#1801](https://github.com/vllm-project/vllm-" +"ascend/pull/1801) [#2083](https://github.com/vllm-project/vllm-" +"ascend/pull/2083) [#1989](https://github.com/vllm-project/vllm-" +"ascend/pull/1989)" #: ../../source/user_guide/release_notes.md:954 msgid "Model Improvement" @@ -4917,7 +5412,16 @@ msgid "" "ascend/pull/1827) [#2093](https://github.com/vllm-project/vllm-" "ascend/pull/2093)" msgstr "" -"DeepSeek DBO 支持与改进:[#1285](https://github.com/vllm-project/vllm-ascend/pull/1285) [#1291](https://github.com/vllm-project/vllm-ascend/pull/1291) [#1328](https://github.com/vllm-project/vllm-ascend/pull/1328) [#1420](https://github.com/vllm-project/vllm-ascend/pull/1420) [#1445](https://github.com/vllm-project/vllm-ascend/pull/1445) [#1589](https://github.com/vllm-project/vllm-ascend/pull/1589) [#1759](https://github.com/vllm-project/vllm-ascend/pull/1759) [#1827](https://github.com/vllm-project/vllm-ascend/pull/1827) [#2093](https://github.com/vllm-project/vllm-ascend/pull/2093)" +"DeepSeek DBO 支持与改进:[#1285](https://github.com/vllm-project/vllm-" +"ascend/pull/1285) [#1291](https://github.com/vllm-project/vllm-" +"ascend/pull/1291) [#1328](https://github.com/vllm-project/vllm-" +"ascend/pull/1328) [#1420](https://github.com/vllm-project/vllm-" +"ascend/pull/1420) [#1445](https://github.com/vllm-project/vllm-" +"ascend/pull/1445) [#1589](https://github.com/vllm-project/vllm-" +"ascend/pull/1589) [#1759](https://github.com/vllm-project/vllm-" +"ascend/pull/1759) [#1827](https://github.com/vllm-project/vllm-" +"ascend/pull/1827) [#2093](https://github.com/vllm-project/vllm-" +"ascend/pull/2093)" #: ../../source/user_guide/release_notes.md:957 msgid "" @@ -4934,7 +5438,18 @@ msgid "" "ascend/pull/1990) [#2019](https://github.com/vllm-project/vllm-" "ascend/pull/2019)" msgstr "" -"DeepSeek MTP 改进与错误修复:[#1214](https://github.com/vllm-project/vllm-ascend/pull/1214) [#943](https://github.com/vllm-project/vllm-ascend/pull/943) [#1584](https://github.com/vllm-project/vllm-ascend/pull/1584) [#1473](https://github.com/vllm-project/vllm-ascend/pull/1473) [#1294](https://github.com/vllm-project/vllm-ascend/pull/1294) [#1632](https://github.com/vllm-project/vllm-ascend/pull/1632) [#1694](https://github.com/vllm-project/vllm-ascend/pull/1694) [#1840](https://github.com/vllm-project/vllm-ascend/pull/1840) [#2076](https://github.com/vllm-project/vllm-ascend/pull/2076) [#1990](https://github.com/vllm-project/vllm-ascend/pull/1990) [#2019](https://github.com/vllm-project/vllm-ascend/pull/2019)" +"DeepSeek MTP 改进与错误修复:[#1214](https://github.com/vllm-project/vllm-" +"ascend/pull/1214) [#943](https://github.com/vllm-project/vllm-" +"ascend/pull/943) [#1584](https://github.com/vllm-project/vllm-" +"ascend/pull/1584) [#1473](https://github.com/vllm-project/vllm-" +"ascend/pull/1473) [#1294](https://github.com/vllm-project/vllm-" +"ascend/pull/1294) [#1632](https://github.com/vllm-project/vllm-" +"ascend/pull/1632) [#1694](https://github.com/vllm-project/vllm-" +"ascend/pull/1694) [#1840](https://github.com/vllm-project/vllm-" +"ascend/pull/1840) [#2076](https://github.com/vllm-project/vllm-" +"ascend/pull/2076) [#1990](https://github.com/vllm-project/vllm-" +"ascend/pull/1990) [#2019](https://github.com/vllm-project/vllm-" +"ascend/pull/2019)" #: ../../source/user_guide/release_notes.md:958 msgid "" @@ -4943,7 +5458,10 @@ msgid "" "[#2006](https://github.com/vllm-project/vllm-ascend/pull/2006) " "[#1832](https://github.com/vllm-project/vllm-ascend/pull/1832)" msgstr "" -"Qwen3 MoE 在图模式和 DP 方面的支持改进与错误修复:[#1940](https://github.com/vllm-project/vllm-ascend/pull/1940) [#2006](https://github.com/vllm-project/vllm-ascend/pull/2006) [#1832](https://github.com/vllm-project/vllm-ascend/pull/1832)" +"Qwen3 MoE 在图模式和 DP 方面的支持改进与错误修复:[#1940](https://github.com/vllm-project" +"/vllm-ascend/pull/1940) [#2006](https://github.com/vllm-project/vllm-" +"ascend/pull/2006) [#1832](https://github.com/vllm-project/vllm-" +"ascend/pull/1832)" #: ../../source/user_guide/release_notes.md:959 msgid "" @@ -4954,7 +5472,12 @@ msgid "" "[#1782](https://github.com/vllm-project/vllm-ascend/pull/1782) " "[#1745](https://github.com/vllm-project/vllm-ascend/pull/1745)" msgstr "" -"Qwen3 在 rmsnorm/repo/mlp 算子方面的性能改进:[#1545](https://github.com/vllm-project/vllm-ascend/pull/1545) [#1719](https://github.com/vllm-project/vllm-ascend/pull/1719) [#1726](https://github.com/vllm-project/vllm-ascend/pull/1726) [#1782](https://github.com/vllm-project/vllm-ascend/pull/1782) [#1745](https://github.com/vllm-project/vllm-ascend/pull/1745)" +"Qwen3 在 rmsnorm/repo/mlp 算子方面的性能改进:[#1545](https://github.com/vllm-" +"project/vllm-ascend/pull/1545) [#1719](https://github.com/vllm-project" +"/vllm-ascend/pull/1719) [#1726](https://github.com/vllm-project/vllm-" +"ascend/pull/1726) [#1782](https://github.com/vllm-project/vllm-" +"ascend/pull/1782) [#1745](https://github.com/vllm-project/vllm-" +"ascend/pull/1745)" #: ../../source/user_guide/release_notes.md:960 msgid "" @@ -4968,7 +5491,15 @@ msgid "" "[#2170](https://github.com/vllm-project/vllm-ascend/pull/2170) " "[#1551](https://github.com/vllm-project/vllm-ascend/pull/1551)" msgstr "" -"DeepSeek MLA 分块预填充/图模式/多流改进与错误修复:[#1240](https://github.com/vllm-project/vllm-ascend/pull/1240) [#933](https://github.com/vllm-project/vllm-ascend/pull/933) [#1135](https://github.com/vllm-project/vllm-ascend/pull/1135) [#1311](https://github.com/vllm-project/vllm-ascend/pull/1311) [#1750](https://github.com/vllm-project/vllm-ascend/pull/1750) [#1872](https://github.com/vllm-project/vllm-ascend/pull/1872) [#2170](https://github.com/vllm-project/vllm-ascend/pull/2170) [#1551](https://github.com/vllm-project/vllm-ascend/pull/1551)" +"DeepSeek MLA 分块预填充/图模式/多流改进与错误修复:[#1240](https://github.com/vllm-project" +"/vllm-ascend/pull/1240) [#933](https://github.com/vllm-project/vllm-" +"ascend/pull/933) [#1135](https://github.com/vllm-project/vllm-" +"ascend/pull/1135) [#1311](https://github.com/vllm-project/vllm-" +"ascend/pull/1311) [#1750](https://github.com/vllm-project/vllm-" +"ascend/pull/1750) [#1872](https://github.com/vllm-project/vllm-" +"ascend/pull/1872) [#2170](https://github.com/vllm-project/vllm-" +"ascend/pull/2170) [#1551](https://github.com/vllm-project/vllm-" +"ascend/pull/1551)" #: ../../source/user_guide/release_notes.md:961 msgid "" @@ -4978,7 +5509,11 @@ msgid "" "[#1929](https://github.com/vllm-project/vllm-ascend/pull/1929) " "[#2007](https://github.com/vllm-project/vllm-ascend/pull/2007)" msgstr "" -"通过改进 mrope/填充机制提升 Qwen2.5 VL:[#1261](https://github.com/vllm-project/vllm-ascend/pull/1261) [#1705](https://github.com/vllm-project/vllm-ascend/pull/1705) [#1929](https://github.com/vllm-project/vllm-ascend/pull/1929) [#2007](https://github.com/vllm-project/vllm-ascend/pull/2007)" +"通过改进 mrope/填充机制提升 Qwen2.5 VL:[#1261](https://github.com/vllm-project" +"/vllm-ascend/pull/1261) [#1705](https://github.com/vllm-project/vllm-" +"ascend/pull/1705) [#1929](https://github.com/vllm-project/vllm-" +"ascend/pull/1929) [#2007](https://github.com/vllm-project/vllm-" +"ascend/pull/2007)" #: ../../source/user_guide/release_notes.md:962 msgid "" @@ -4987,7 +5522,9 @@ msgid "" "ascend/pull/1234) [#1501](https://github.com/vllm-project/vllm-" "ascend/pull/1501)" msgstr "" -"Ray:修复使用 ray 时的设备错误,添加 initialize_cache 并改进警告信息:[#1234](https://github.com/vllm-project/vllm-ascend/pull/1234) [#1501](https://github.com/vllm-project/vllm-ascend/pull/1501)" +"Ray:修复使用 ray 时的设备错误,添加 initialize_cache " +"并改进警告信息:[#1234](https://github.com/vllm-project/vllm-ascend/pull/1234) " +"[#1501](https://github.com/vllm-project/vllm-ascend/pull/1501)" #: ../../source/user_guide/release_notes.md:964 msgid "Graph Mode Improvement" @@ -4998,7 +5535,8 @@ msgid "" "Fix DeepSeek with deepseek with mc2 in [#1269](https://github.com/vllm-" "project/vllm-ascend/pull/1269)" msgstr "" -"在 [#1269](https://github.com/vllm-project/vllm-ascend/pull/1269) 中修复了 DeepSeek 与 mc2 的问题。" +"在 [#1269](https://github.com/vllm-project/vllm-ascend/pull/1269) 中修复了 " +"DeepSeek 与 mc2 的问题。" #: ../../source/user_guide/release_notes.md:967 msgid "" @@ -5006,42 +5544,48 @@ msgid "" "long sequence predictions in [#1332](https://github.com/vllm-project" "/vllm-ascend/pull/1332)" msgstr "" -"在 [#1332](https://github.com/vllm-project/vllm-ascend/pull/1332) 中修复了 deepseek V3/R1 模型在使用 torchair 图进行长序列预测时的精度问题。" +"在 [#1332](https://github.com/vllm-project/vllm-ascend/pull/1332) 中修复了 " +"deepseek V3/R1 模型在使用 torchair 图进行长序列预测时的精度问题。" #: ../../source/user_guide/release_notes.md:968 msgid "" "Fix torchair_graph_batch_sizes bug in [#1570](https://github.com/vllm-" "project/vllm-ascend/pull/1570)" msgstr "" -"在 [#1570](https://github.com/vllm-project/vllm-ascend/pull/1570) 中修复了 torchair_graph_batch_sizes 错误。" +"在 [#1570](https://github.com/vllm-project/vllm-ascend/pull/1570) 中修复了 " +"torchair_graph_batch_sizes 错误。" #: ../../source/user_guide/release_notes.md:969 msgid "" "Enable the limit of tp <= 4 for torchair graph mode in " "[#1404](https://github.com/vllm-project/vllm-ascend/pull/1404)" msgstr "" -"在 [#1404](https://github.com/vllm-project/vllm-ascend/pull/1404) 中为 torchair 图模式启用了 tp <= 4 的限制。" +"在 [#1404](https://github.com/vllm-project/vllm-ascend/pull/1404) 中为 " +"torchair 图模式启用了 tp <= 4 的限制。" #: ../../source/user_guide/release_notes.md:970 msgid "" "Fix rope accuracy bug [#1887](https://github.com/vllm-project/vllm-" "ascend/pull/1887)" msgstr "" -"修复 rope 精度错误 [#1887](https://github.com/vllm-project/vllm-ascend/pull/1887)" +"修复 rope 精度错误 [#1887](https://github.com/vllm-project/vllm-" +"ascend/pull/1887)" #: ../../source/user_guide/release_notes.md:971 msgid "" "Support multistream of shared experts in FusedMoE " "[#997](https://github.com/vllm-project/vllm-ascend/pull/997)" msgstr "" -"支持 FusedMoE 中共享专家的多流处理 [#997](https://github.com/vllm-project/vllm-ascend/pull/997)" +"支持 FusedMoE 中共享专家的多流处理 [#997](https://github.com/vllm-project/vllm-" +"ascend/pull/997)" #: ../../source/user_guide/release_notes.md:972 msgid "" "Enable kvcache_nz for the decode process in torchair graph " "mode[#1098](https://github.com/vllm-project/vllm-ascend/pull/1098)" msgstr "" -"在 torchair 图模式的解码过程中启用 kvcache_nz [#1098](https://github.com/vllm-project/vllm-ascend/pull/1098)" +"在 torchair 图模式的解码过程中启用 kvcache_nz [#1098](https://github.com/vllm-project" +"/vllm-ascend/pull/1098)" #: ../../source/user_guide/release_notes.md:973 msgid "" @@ -5049,77 +5593,89 @@ msgid "" "local variable 'decode_hs_or_q_c' issue in [#1378](https://github.com" "/vllm-project/vllm-ascend/pull/1378)" msgstr "" -"修复结合 torchair 的分块预填充场景,解决 UnboundLocalError: local variable 'decode_hs_or_q_c' 问题 [#1378](https://github.com/vllm-project/vllm-ascend/pull/1378)" +"修复结合 torchair 的分块预填充场景,解决 UnboundLocalError: local variable " +"'decode_hs_or_q_c' 问题 [#1378](https://github.com/vllm-project/vllm-" +"ascend/pull/1378)" #: ../../source/user_guide/release_notes.md:974 msgid "" "Improve shared experts multi-stream perf for w8a8 dynamic. in " "[#1561](https://github.com/vllm-project/vllm-ascend/pull/1561)" msgstr "" -"在 [#1561](https://github.com/vllm-project/vllm-ascend/pull/1561) 中改进了 w8a8 动态量化下共享专家的多流性能。" +"在 [#1561](https://github.com/vllm-project/vllm-ascend/pull/1561) 中改进了 " +"w8a8 动态量化下共享专家的多流性能。" #: ../../source/user_guide/release_notes.md:975 msgid "" "Repair moe error when set multistream. in [#1882](https://github.com" "/vllm-project/vllm-ascend/pull/1882)" msgstr "" -"在 [#1882](https://github.com/vllm-project/vllm-ascend/pull/1882) 中修复了设置多流时的 moe 错误。" +"在 [#1882](https://github.com/vllm-project/vllm-ascend/pull/1882) " +"中修复了设置多流时的 moe 错误。" #: ../../source/user_guide/release_notes.md:976 msgid "" "Round up graph batch size to tp size in EP case " "[#1610](https://github.com/vllm-project/vllm-ascend/pull/1610)" msgstr "" -"在 EP 场景下将图批次大小向上取整至 tp 大小 [#1610](https://github.com/vllm-project/vllm-ascend/pull/1610)" +"在 EP 场景下将图批次大小向上取整至 tp 大小 [#1610](https://github.com/vllm-project/vllm-" +"ascend/pull/1610)" #: ../../source/user_guide/release_notes.md:977 msgid "" "Fix torchair bug when DP is enabled in [#1727](https://github.com/vllm-" "project/vllm-ascend/pull/1727)" msgstr "" -"在 [#1727](https://github.com/vllm-project/vllm-ascend/pull/1727) 中修复了启用 DP 时的 torchair 错误。" +"在 [#1727](https://github.com/vllm-project/vllm-ascend/pull/1727) 中修复了启用 " +"DP 时的 torchair 错误。" #: ../../source/user_guide/release_notes.md:978 msgid "" "Add extra checking to torchair_graph_config. in " "[#1675](https://github.com/vllm-project/vllm-ascend/pull/1675)" msgstr "" -"在 [#1675](https://github.com/vllm-project/vllm-ascend/pull/1675) 中为 torchair_graph_config 添加了额外检查。" +"在 [#1675](https://github.com/vllm-project/vllm-ascend/pull/1675) 中为 " +"torchair_graph_config 添加了额外检查。" #: ../../source/user_guide/release_notes.md:979 msgid "" "Fix rope bug in torchair+chunk-prefill scenario in " "[#1693](https://github.com/vllm-project/vllm-ascend/pull/1693)" msgstr "" -"在 [#1693](https://github.com/vllm-project/vllm-ascend/pull/1693) 中修复了 torchair+分块预填充场景下的 rope 错误。" +"在 [#1693](https://github.com/vllm-project/vllm-ascend/pull/1693) 中修复了 " +"torchair+分块预填充场景下的 rope 错误。" #: ../../source/user_guide/release_notes.md:980 msgid "" "torchair_graph bugfix when chunked_prefill is true in " "[#1748](https://github.com/vllm-project/vllm-ascend/pull/1748)" msgstr "" -"在 [#1748](https://github.com/vllm-project/vllm-ascend/pull/1748) 中修复了 chunked_prefill 为 true 时的 torchair_graph 错误。" +"在 [#1748](https://github.com/vllm-project/vllm-ascend/pull/1748) 中修复了 " +"chunked_prefill 为 true 时的 torchair_graph 错误。" #: ../../source/user_guide/release_notes.md:981 msgid "" "Improve prefill optimization to support torchair graph mode in " "[#2090](https://github.com/vllm-project/vllm-ascend/pull/2090)" msgstr "" -"在 [#2090](https://github.com/vllm-project/vllm-ascend/pull/2090) 中改进了预填充优化以支持 torchair 图模式。" +"在 [#2090](https://github.com/vllm-project/vllm-ascend/pull/2090) " +"中改进了预填充优化以支持 torchair 图模式。" #: ../../source/user_guide/release_notes.md:982 msgid "" "Fix rank set in DP scenario [#1247](https://github.com/vllm-project/vllm-" "ascend/pull/1247)" msgstr "" -"修复 DP 场景下的 rank 设置 [#1247](https://github.com/vllm-project/vllm-ascend/pull/1247)" +"修复 DP 场景下的 rank 设置 [#1247](https://github.com/vllm-project/vllm-" +"ascend/pull/1247)" #: ../../source/user_guide/release_notes.md:983 msgid "" "Reset all unused positions to prevent out-of-bounds to resolve GatherV3 " "bug in [#1397](https://github.com/vllm-project/vllm-ascend/pull/1397)" msgstr "" -"重置所有未使用的位置以防止越界,以解决 GatherV3 错误 [#1397](https://github.com/vllm-project/vllm-ascend/pull/1397)" +"重置所有未使用的位置以防止越界,以解决 GatherV3 错误 [#1397](https://github.com/vllm-project" +"/vllm-ascend/pull/1397)" #: ../../source/user_guide/release_notes.md:984 msgid "" @@ -5202,8 +5758,8 @@ msgid "" "Used fused ops npu_top_k_top_p in sampler [#1920](https://github.com" "/vllm-project/vllm-ascend/pull/1920)" msgstr "" -"在采样器中使用融合算子 npu_top_k_top_p [#1920](https://github.com/vllm-project" -"/vllm-ascend/pull/1920)" +"在采样器中使用融合算子 npu_top_k_top_p [#1920](https://github.com/vllm-project/vllm-" +"ascend/pull/1920)" #: ../../source/user_guide/release_notes.md:1000 msgid "" @@ -5241,39 +5797,37 @@ msgstr "" msgid "" "Fixed v0 spec decode in [#1323](https://github.com/vllm-project/vllm-" "ascend/pull/1323)" -msgstr "" -"修复 v0 推测解码 [#1323](https://github.com/vllm-project/vllm-ascend/pull/1323)" +msgstr "修复 v0 推测解码 [#1323](https://github.com/vllm-project/vllm-ascend/pull/1323)" #: ../../source/user_guide/release_notes.md:1005 msgid "" "Enabled `ACL_OP_INIT_MODE=1` directly only when using V0 spec decode in " "[#1271](https://github.com/vllm-project/vllm-ascend/pull/1271)" msgstr "" -"仅在使用 V0 推测解码时直接启用 `ACL_OP_INIT_MODE=1` [#1271](https://github.com" -"/vllm-project/vllm-ascend/pull/1271)" +"仅在使用 V0 推测解码时直接启用 `ACL_OP_INIT_MODE=1` [#1271](https://github.com/vllm-" +"project/vllm-ascend/pull/1271)" #: ../../source/user_guide/release_notes.md:1006 msgid "" "Refactoring forward_context and model_runner_v1 in " "[#1422](https://github.com/vllm-project/vllm-ascend/pull/1422)" msgstr "" -"重构 forward_context 和 model_runner_v1 [#1422](https://github.com/vllm-project" -"/vllm-ascend/pull/1422)" +"重构 forward_context 和 model_runner_v1 [#1422](https://github.com/vllm-" +"project/vllm-ascend/pull/1422)" #: ../../source/user_guide/release_notes.md:1007 msgid "" "Fixed sampling params in [#1423](https://github.com/vllm-project/vllm-" "ascend/pull/1423)" -msgstr "" -"修复采样参数 [#1423](https://github.com/vllm-project/vllm-ascend/pull/1423)" +msgstr "修复采样参数 [#1423](https://github.com/vllm-project/vllm-ascend/pull/1423)" #: ../../source/user_guide/release_notes.md:1008 msgid "" "Added a switch for enabling NZ layout in weights and enable NZ for GMM. " "in [#1409](https://github.com/vllm-project/vllm-ascend/pull/1409)" msgstr "" -"添加权重启用 NZ 布局的开关并为 GMM 启用 NZ [#1409](https://github.com/vllm-" -"project/vllm-ascend/pull/1409)" +"添加权重启用 NZ 布局的开关并为 GMM 启用 NZ [#1409](https://github.com/vllm-project/vllm-" +"ascend/pull/1409)" #: ../../source/user_guide/release_notes.md:1009 msgid "" @@ -5284,38 +5838,38 @@ msgid "" msgstr "" "修复 ascend_forward_context 中的错误 [#1449](https://github.com/vllm-project" "/vllm-ascend/pull/1449) [#1554](https://github.com/vllm-project/vllm-" -"ascend/pull/1554) [#1598](https://github.com/vllm-project/vllm-ascend/pull/1598)" +"ascend/pull/1554) [#1598](https://github.com/vllm-project/vllm-" +"ascend/pull/1598)" #: ../../source/user_guide/release_notes.md:1010 msgid "" "Address PrefillCacheHit state to fix prefix cache accuracy bug in " "[#1492](https://github.com/vllm-project/vllm-ascend/pull/1492)" msgstr "" -"处理 PrefillCacheHit 状态以修复前缀缓存准确性问题 [#1492](https://github.com" -"/vllm-project/vllm-ascend/pull/1492)" +"处理 PrefillCacheHit 状态以修复前缀缓存准确性问题 [#1492](https://github.com/vllm-project" +"/vllm-ascend/pull/1492)" #: ../../source/user_guide/release_notes.md:1011 msgid "" "Fixed load weight error and add new e2e case in " "[#1651](https://github.com/vllm-project/vllm-ascend/pull/1651)" msgstr "" -"修复加载权重错误并添加新的端到端测试用例 [#1651](https://github.com/vllm-project" -"/vllm-ascend/pull/1651)" +"修复加载权重错误并添加新的端到端测试用例 [#1651](https://github.com/vllm-project/vllm-" +"ascend/pull/1651)" #: ../../source/user_guide/release_notes.md:1012 msgid "" "Optimized the number of rope-related index selections in deepseek. in " "[#1614](https://github.com/vllm-project/vllm-ascend/pull/1614)" msgstr "" -"优化 DeepSeek 中与 rope 相关的索引选择次数 [#1614](https://github.com/vllm-" -"project/vllm-ascend/pull/1614)" +"优化 DeepSeek 中与 rope 相关的索引选择次数 [#1614](https://github.com/vllm-project" +"/vllm-ascend/pull/1614)" #: ../../source/user_guide/release_notes.md:1013 msgid "" "Added mc2 mask in [#1642](https://github.com/vllm-project/vllm-" "ascend/pull/1642)" -msgstr "" -"添加 mc2 掩码 [#1642](https://github.com/vllm-project/vllm-ascend/pull/1642)" +msgstr "添加 mc2 掩码 [#1642](https://github.com/vllm-project/vllm-ascend/pull/1642)" #: ../../source/user_guide/release_notes.md:1014 msgid "" @@ -5324,9 +5878,10 @@ msgid "" "[#1896](https://github.com/vllm-project/vllm-ascend/pull/1896) " "[#2003](https://github.com/vllm-project/vllm-ascend/pull/2003)" msgstr "" -"修复静态 EPLB log2phy 条件并改进单元测试 [#1667](https://github.com/vllm-project" -"/vllm-ascend/pull/1667) [#1896](https://github.com/vllm-project/vllm-" -"ascend/pull/1896) [#2003](https://github.com/vllm-project/vllm-ascend/pull/2003)" +"修复静态 EPLB log2phy 条件并改进单元测试 [#1667](https://github.com/vllm-project/vllm-" +"ascend/pull/1667) [#1896](https://github.com/vllm-project/vllm-" +"ascend/pull/1896) [#2003](https://github.com/vllm-project/vllm-" +"ascend/pull/2003)" #: ../../source/user_guide/release_notes.md:1015 msgid "" @@ -5390,8 +5945,8 @@ msgid "" "Fixed disaggregate prefill hang issue in long output in " "[#1807](https://github.com/vllm-project/vllm-ascend/pull/1807)" msgstr "" -"修复长输出中 disaggregate prefill 挂起问题 [#1807](https://github.com/vllm-" -"project/vllm-ascend/pull/1807)" +"修复长输出中 disaggregate prefill 挂起问题 [#1807](https://github.com/vllm-project" +"/vllm-ascend/pull/1807)" #: ../../source/user_guide/release_notes.md:1023 msgid "" @@ -5406,32 +5961,32 @@ msgid "" "ep_group is not equal to word_size in some cases in " "[#1862](https://github.com/vllm-project/vllm-ascend/pull/1862)." msgstr "" -"在某些情况下 ep_group 不等于 word_size [#1862](https://github.com/vllm-project" -"/vllm-ascend/pull/1862)." +"在某些情况下,ep_group 不等于 word_size [#1862](https://github.com/vllm-project" +"/vllm-ascend/pull/1862)。" #: ../../source/user_guide/release_notes.md:1025 msgid "" "Fixed wheel glibc version incompatibility in [#1808](https://github.com" "/vllm-project/vllm-ascend/pull/1808)." msgstr "" -"修复 wheel glibc 版本不兼容问题 [#1808](https://github.com/vllm-project/vllm-" -"ascend/pull/1808)." +"修复了 wheel glibc 版本不兼容问题 [#1808](https://github.com/vllm-project/vllm-" +"ascend/pull/1808)。" #: ../../source/user_guide/release_notes.md:1026 msgid "" "Fixed mc2 process group to resolve self.cpu_group is None in " "[#1831](https://github.com/vllm-project/vllm-ascend/pull/1831)." msgstr "" -"修复 mc2 进程组以解决 self.cpu_group 为 None 的问题 [#1831](https://github.com" -"/vllm-project/vllm-ascend/pull/1831)." +"修复了 mc2 进程组以解决 self.cpu_group 为 None 的问题 [#1831](https://github.com/vllm-" +"project/vllm-ascend/pull/1831)。" #: ../../source/user_guide/release_notes.md:1027 msgid "" "Pin vllm version to v0.9.1 to make mypy check passed in " "[#1904](https://github.com/vllm-project/vllm-ascend/pull/1904)." msgstr "" -"将 vllm 版本固定为 v0.9.1 以使 mypy 检查通过 [#1904](https://github.com/vllm-" -"project/vllm-ascend/pull/1904)。" +"将 vllm 版本固定为 v0.9.1 以使 mypy 检查通过 [#1904](https://github.com/vllm-project" +"/vllm-ascend/pull/1904)。" #: ../../source/user_guide/release_notes.md:1028 msgid "" @@ -5454,8 +6009,8 @@ msgid "" "Avoid performing cpu all_reduce in disaggregated-prefill scenario in " "[#1644](https://github.com/vllm-project/vllm-ascend/pull/1644)." msgstr "" -"在解耦预填充场景中避免执行 CPU all_reduce [#1644](https://github.com/vllm-" -"project/vllm-ascend/pull/1644)。" +"在解耦预填充场景中避免执行 CPU all_reduce [#1644](https://github.com/vllm-project" +"/vllm-ascend/pull/1644)。" #: ../../source/user_guide/release_notes.md:1031 msgid "" @@ -5463,23 +6018,23 @@ msgid "" "project/vllm-ascend/pull/1916)" msgstr "" "在解码 MoE 中添加了超级内核 [#1916](https://github.com/vllm-project/vllm-" -"ascend/pull/1916)" +"ascend/pull/1916)。" #: ../../source/user_guide/release_notes.md:1032 msgid "" "[Prefill Perf] Parallel Strategy Optimizations (VRAM-for-Speed Tradeoff) " "in [#1802](https://github.com/vllm-project/vllm-ascend/pull/1802)." msgstr "" -"[预填充性能] 并行策略优化(显存换速度权衡) [#1802](https://github.com/vllm-project" -"/vllm-ascend/pull/1802)。" +"[预填充性能] 并行策略优化(显存换速度权衡) [#1802](https://github.com/vllm-project/vllm-" +"ascend/pull/1802)。" #: ../../source/user_guide/release_notes.md:1033 msgid "" "Removed unnecessary reduce_results access in shared_experts.down_proj in " "[#2016](https://github.com/vllm-project/vllm-ascend/pull/2016)." msgstr "" -"移除了 shared_experts.down_proj 中不必要的 reduce_results 访问 [#2016](https://github.com" -"/vllm-project/vllm-ascend/pull/2016)。" +"移除了 shared_experts.down_proj 中不必要的 reduce_results 访问 " +"[#2016](https://github.com/vllm-project/vllm-ascend/pull/2016)。" #: ../../source/user_guide/release_notes.md:1034 msgid "" @@ -5511,7 +6066,8 @@ msgid "" "Added CPU binding support [#2031](https://github.com/vllm-project/vllm-" "ascend/pull/2031)." msgstr "" -"添加了 CPU 绑定支持 [#2031](https://github.com/vllm-project/vllm-ascend/pull/2031)。" +"添加了 CPU 绑定支持 [#2031](https://github.com/vllm-project/vllm-" +"ascend/pull/2031)。" #: ../../source/user_guide/release_notes.md:1038 msgid "" @@ -5535,7 +6091,7 @@ msgid "" "[#1113](https://github.com/vllm-project/vllm-ascend/pull/1113)" msgstr "" "提供了执行时长性能分析的端到端指南 [#1113](https://github.com/vllm-project/vllm-" -"ascend/pull/1113)" +"ascend/pull/1113)。" #: ../../source/user_guide/release_notes.md:1044 msgid "" @@ -5543,21 +6099,21 @@ msgid "" "[#1192](https://github.com/vllm-project/vllm-ascend/pull/1192)" msgstr "" "为 CANN 包下载 URL 添加 Referer 请求头 [#1192](https://github.com/vllm-project" -"/vllm-ascend/pull/1192)" +"/vllm-ascend/pull/1192)。" #: ../../source/user_guide/release_notes.md:1045 msgid "" "Add reinstall instructions doc [#1370](https://github.com/vllm-project" "/vllm-ascend/pull/1370)" -msgstr "" -"添加了重新安装说明文档 [#1370](https://github.com/vllm-project/vllm-ascend/pull/1370)" +msgstr "添加了重新安装说明文档 [#1370](https://github.com/vllm-project/vllm-ascend/pull/1370)。" #: ../../source/user_guide/release_notes.md:1046 msgid "" "Update Disaggregate prefill README [#1379](https://github.com/vllm-" "project/vllm-ascend/pull/1379)" msgstr "" -"更新了解耦预填充 README [#1379](https://github.com/vllm-project/vllm-ascend/pull/1379)" +"更新了解耦预填充 README [#1379](https://github.com/vllm-project/vllm-" +"ascend/pull/1379)。" #: ../../source/user_guide/release_notes.md:1047 msgid "" @@ -5565,7 +6121,7 @@ msgid "" "[#1296](https://github.com/vllm-project/vllm-ascend/pull/1296)" msgstr "" "支持 KV 缓存寄存器风格解耦预填充 [#1296](https://github.com/vllm-project/vllm-" -"ascend/pull/1296)" +"ascend/pull/1296)。" #: ../../source/user_guide/release_notes.md:1048 msgid "" @@ -5574,7 +6130,7 @@ msgid "" "/vllm-project/vllm-ascend/pull/1965)" msgstr "" "修复了 examples/disaggregate_prefill_v1/README.md 中的错误和非标准部分 " -"[#1965](https://github.com/vllm-project/vllm-ascend/pull/1965)" +"[#1965](https://github.com/vllm-project/vllm-ascend/pull/1965)。" #: ../../source/user_guide/release_notes.md:1052 msgid "" @@ -5591,7 +6147,7 @@ msgid "" "error [#2226](https://github.com/vllm-project/vllm-ascend/issues/2226)" msgstr "" "启用 EP 时,带 TP 的 Qwen3 MoE aclgraph 模式因 bincount 错误而失败 " -"[#2226](https://github.com/vllm-project/vllm-ascend/issues/2226)" +"[#2226](https://github.com/vllm-project/vllm-ascend/issues/2226)。" #: ../../source/user_guide/release_notes.md:1054 msgid "" @@ -5610,7 +6166,7 @@ msgid "" "ascend/tree/v0.9.2rc1) to get started. From this release, V1 engine will " "be enabled by default, there is no need to set `VLLM_USE_V1=1` any more. " "And this release is the last version to support V0 engine, V0 code will " -"be clean up in the future." +"be cleaned up in the future." msgstr "" "这是 vLLM Ascend v0.9.2 的第一个候选发布版本。请遵循[官方文档](https://github.com/vllm-" "project/vllm-ascend/tree/v0.9.2rc1)开始使用。从本版本起,V1 引擎将默认启用,不再需要设置 " @@ -5648,16 +6204,17 @@ msgid "" "`2.5.1.post1.dev20250619`. Don’t forget to update it in your environment." " [#1347](https://github.com/vllm-project/vllm-ascend/pull/1347)" msgstr "" -"Ascend PyTorch 适配器 (torch_npu) 已升级至 `2.5.1.post1.dev20250619`。请勿忘记在您的环境中更新它 " -"[#1347](https://github.com/vllm-project/vllm-ascend/pull/1347)。" +"Ascend PyTorch 适配器 (torch_npu) 已升级至 " +"`2.5.1.post1.dev20250619`。请勿忘记在您的环境中更新它 [#1347](https://github.com/vllm-" +"project/vllm-ascend/pull/1347)。" #: ../../source/user_guide/release_notes.md:1069 msgid "" "The GatherV3 error has been fixed with aclgraph mode. " "[#1416](https://github.com/vllm-project/vllm-ascend/pull/1416)" msgstr "" -"GatherV3 错误已在 aclgraph 模式下修复 [#1416](https://github.com/vllm-project/vllm-" -"ascend/pull/1416)。" +"GatherV3 错误已在 aclgraph 模式下修复 [#1416](https://github.com/vllm-project" +"/vllm-ascend/pull/1416)。" #: ../../source/user_guide/release_notes.md:1070 msgid "" @@ -5689,16 +6246,15 @@ msgid "" "Official doc has been updated for better read experience. For example, " "more deployment tutorials are added, user/developer docs are updated. " "More guide will coming soon." -msgstr "" -"官方文档已更新,以提供更好的阅读体验。例如,添加了更多部署教程,更新了用户/开发者文档。更多指南即将推出。" +msgstr "官方文档已更新,以提供更好的阅读体验。例如,添加了更多部署教程,更新了用户/开发者文档。更多指南即将推出。" #: ../../source/user_guide/release_notes.md:1077 msgid "" -"Fix accuracy problem for deepseek V3/R1 models with torchair graph in " +"Fix accuracy problem for Deepseek V3/R1 models with torchair graph in " "long sequence predictions. [#1331](https://github.com/vllm-project/vllm-" "ascend/pull/1331)" msgstr "" -"修复了 deepseek V3/R1 模型在使用 torchair 图进行长序列预测时的精度问题 " +"修复了 Deepseek V3/R1 模型在使用 torchair 图进行长序列预测时的精度问题 " "[#1331](https://github.com/vllm-project/vllm-ascend/pull/1331)。" #: ../../source/user_guide/release_notes.md:1078 @@ -5708,9 +6264,9 @@ msgid "" "models. The default value is `0`. [#1335](https://github.com/vllm-project" "/vllm-ascend/pull/1335)" msgstr "" -"新增了一个环境变量 `VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP`。它为 Deepseek V3/R1 模型启用了融合的 " -"allgather-experts 内核。默认值为 `0` [#1335](https://github.com/vllm-project/vllm-" -"ascend/pull/1335)。" +"新增了一个环境变量 `VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP`。它为 Deepseek V3/R1 " +"模型启用了融合的 allgather-experts 内核。默认值为 `0` [#1335](https://github.com/vllm-" +"project/vllm-ascend/pull/1335)。" #: ../../source/user_guide/release_notes.md:1079 msgid "" @@ -5780,7 +6336,8 @@ msgid "" "" msgstr "" "流水线并行目前无法与 ray 和图模式协同工作: " +"ascend/issues/1751> " #: ../../source/user_guide/release_notes.md:1090 #: ../../source/user_guide/release_notes.md:1149 @@ -5792,84 +6349,96 @@ msgid "" "@xleoken made their first contribution in " msgstr "" -"@xleoken 在 中完成了首次贡献" +"@xleoken 在 " +"中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1093 msgid "" "@lyj-jjj made their first contribution in " msgstr "" -"@lyj-jjj 在 中完成了首次贡献" +"@lyj-jjj 在 " +"中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1094 msgid "" "@sharonyunyun made their first contribution in " msgstr "" -"@sharonyunyun 在 中完成了首次贡献" +"@sharonyunyun 在 " +"中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1095 msgid "" "@Pr0Wh1teGivee made their first contribution in " msgstr "" -"@Pr0Wh1teGivee 在 中完成了首次贡献" +"@Pr0Wh1teGivee 在 " +"中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1096 msgid "" "@leo-pony made their first contribution in " msgstr "" -"@leo-pony 在 中完成了首次贡献" +"@leo-pony 在 " +"中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1097 msgid "" "@zeshengzong made their first contribution in " msgstr "" -"@zeshengzong 在 中完成了首次贡献" +"@zeshengzong 在 " +"中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1098 msgid "" "@GDzhu01 made their first contribution in " msgstr "" -"@GDzhu01 在 中完成了首次贡献" +"@GDzhu01 在 " +"中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1099 msgid "" "@Agonixiaoxiao made their first contribution in " msgstr "" -"@Agonixiaoxiao 在 中完成了首次贡献" +"@Agonixiaoxiao 在 " +"中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1100 msgid "" "@zhanghw0354 made their first contribution in " msgstr "" -"@zhanghw0354 在 中完成了首次贡献" +"@zhanghw0354 在 " +"中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1101 msgid "" "@farawayboat made their first contribution in " msgstr "" -"@farawayboat 在 中完成了首次贡献" +"@farawayboat 在 " +"中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1102 msgid "" "@ZhengWG made their first contribution in " msgstr "" -"@ZhengWG 在 中完成了首次贡献" +"@ZhengWG 在 " +"中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1103 msgid "" "@wm901115nwpu made their first contribution in " msgstr "" -"@wm901115nwpu 在 中完成了首次贡献" +"@wm901115nwpu 在 " +"中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1105 msgid "" @@ -5903,7 +6472,9 @@ msgid "" "instruct/Qwen2.5-0.5b/Qwen3-0.6B/Qwen3-4B/Qwen3-8B). " "[#1333](https://github.com/vllm-project/vllm-ascend/pull/1333)" msgstr "" -"本版本实验性支持 Atlas 300I 系列(已通过 Qwen2.5-7b-instruct/Qwen2.5-0.5b/Qwen3-0.6B/Qwen3-4B/Qwen3-8B 的功能测试)。[#1333](https://github.com/vllm-project/vllm-ascend/pull/1333)" +"本版本实验性支持 Atlas 300I 系列(已通过 Qwen2.5-7b-" +"instruct/Qwen2.5-0.5b/Qwen3-0.6B/Qwen3-4B/Qwen3-8B " +"的功能测试)。[#1333](https://github.com/vllm-project/vllm-ascend/pull/1333)" #: ../../source/user_guide/release_notes.md:1114 msgid "" @@ -5920,7 +6491,8 @@ msgid "" " release quality and the feature rapid iteration. We will improve this " "from 0.9.2rc1 and later." msgstr "" -"经过慎重考虑,鉴于 v0.9.1 版本的发布质量要求以及功能的快速迭代,上述功能**将不会被包含在 v0.9.1-dev 分支(即 v0.9.1 最终版本)中**。我们将在 0.9.2rc1 及之后的版本中对其进行改进。" +"经过慎重考虑,鉴于 v0.9.1 版本的发布质量要求以及功能的快速迭代,上述功能**将不会被包含在 v0.9.1-dev 分支(即 v0.9.1 " +"最终版本)中**。我们将在 0.9.2rc1 及之后的版本中对其进行改进。" #: ../../source/user_guide/release_notes.md:1120 msgid "" @@ -5928,28 +6500,33 @@ msgid "" "`2.5.1.post1.dev20250528`. Don’t forget to update it in your environment." " [#1235](https://github.com/vllm-project/vllm-ascend/pull/1235)" msgstr "" -"Ascend PyTorch 适配器(torch_npu)已升级至 `2.5.1.post1.dev20250528`。请勿忘记在您的环境中更新它。[#1235](https://github.com/vllm-project/vllm-ascend/pull/1235)" +"Ascend PyTorch 适配器(torch_npu)已升级至 " +"`2.5.1.post1.dev20250528`。请勿忘记在您的环境中更新它。[#1235](https://github.com/vllm-" +"project/vllm-ascend/pull/1235)" #: ../../source/user_guide/release_notes.md:1121 msgid "" "Support Atlas 300I series container image. You can get it from " "[quay.io](https://quay.io/repository/vllm/vllm-ascend)" msgstr "" -"支持 Atlas 300I 系列容器镜像。您可以从 [quay.io](https://quay.io/repository/vllm/vllm-ascend) 获取。" +"支持 Atlas 300I 系列容器镜像。您可以从 [quay.io](https://quay.io/repository/vllm/vllm-" +"ascend) 获取。" #: ../../source/user_guide/release_notes.md:1122 msgid "" "Fix token-wise padding mechanism to make multi-card graph mode work. " "[#1300](https://github.com/vllm-project/vllm-ascend/pull/1300)" msgstr "" -"修复了按 token 填充的机制,使多卡图模式能够正常工作。[#1300](https://github.com/vllm-project/vllm-ascend/pull/1300)" +"修复了按 token 填充的机制,使多卡图模式能够正常工作。[#1300](https://github.com/vllm-project" +"/vllm-ascend/pull/1300)" #: ../../source/user_guide/release_notes.md:1123 msgid "" "Upgrade vLLM to 0.9.1 [#1165](https://github.com/vllm-project/vllm-" "ascend/pull/1165)" msgstr "" -"将 vLLM 升级至 0.9.1 [#1165](https://github.com/vllm-project/vllm-ascend/pull/1165)" +"将 vLLM 升级至 0.9.1 [#1165](https://github.com/vllm-project/vllm-" +"ascend/pull/1165)" #: ../../source/user_guide/release_notes.md:1125 msgid "Other Improvements" @@ -5960,14 +6537,16 @@ msgid "" "Initial support Chunked Prefill for MLA. [#1172](https://github.com/vllm-" "project/vllm-ascend/pull/1172)" msgstr "" -"为 MLA 初步支持分块预填充。[#1172](https://github.com/vllm-project/vllm-ascend/pull/1172)" +"为 MLA 初步支持分块预填充。[#1172](https://github.com/vllm-project/vllm-" +"ascend/pull/1172)" #: ../../source/user_guide/release_notes.md:1128 msgid "" "An example of best practices to run DeepSeek with ETP has been added. " "[#1101](https://github.com/vllm-project/vllm-ascend/pull/1101)" msgstr "" -"已添加一个使用 ETP 运行 DeepSeek 的最佳实践示例。[#1101](https://github.com/vllm-project/vllm-ascend/pull/1101)" +"已添加一个使用 ETP 运行 DeepSeek 的最佳实践示例。[#1101](https://github.com/vllm-project" +"/vllm-ascend/pull/1101)" #: ../../source/user_guide/release_notes.md:1129 msgid "" @@ -5975,14 +6554,17 @@ msgid "" "[#1098](https://github.com/vllm-project/vllm-ascend/pull/1098), " "[#1131](https://github.com/vllm-project/vllm-ascend/pull/1131)" msgstr "" -"通过使用 TorchAir 图,提升了 DeepSeek 的性能。[#1098](https://github.com/vllm-project/vllm-ascend/pull/1098), [#1131](https://github.com/vllm-project/vllm-ascend/pull/1131)" +"通过使用 TorchAir 图,提升了 DeepSeek 的性能。[#1098](https://github.com/vllm-project" +"/vllm-ascend/pull/1098), [#1131](https://github.com/vllm-project/vllm-" +"ascend/pull/1131)" #: ../../source/user_guide/release_notes.md:1130 msgid "" "Supports the speculative decoding feature with AscendScheduler. " "[#943](https://github.com/vllm-project/vllm-ascend/pull/943)" msgstr "" -"支持 AscendScheduler 的推测式解码功能。[#943](https://github.com/vllm-project/vllm-ascend/pull/943)" +"支持 AscendScheduler 的推测式解码功能。[#943](https://github.com/vllm-project/vllm-" +"ascend/pull/943)" #: ../../source/user_guide/release_notes.md:1131 msgid "" @@ -5990,14 +6572,17 @@ msgid "" "enabled in the next release. [#796](https://github.com/vllm-project/vllm-" "ascend/pull/796)" msgstr "" -"提升了 `VocabParallelEmbedding` 自定义算子的性能。该优化将在下一个版本中启用。[#796](https://github.com/vllm-project/vllm-ascend/pull/796)" +"提升了 `VocabParallelEmbedding` " +"自定义算子的性能。该优化将在下一个版本中启用。[#796](https://github.com/vllm-project/vllm-" +"ascend/pull/796)" #: ../../source/user_guide/release_notes.md:1132 msgid "" "Fixed a device discovery and setup bug when running vLLM Ascend on Ray " "[#884](https://github.com/vllm-project/vllm-ascend/pull/884)" msgstr "" -"修复了在 Ray 上运行 vLLM Ascend 时的设备发现和设置错误。[#884](https://github.com/vllm-project/vllm-ascend/pull/884)" +"修复了在 Ray 上运行 vLLM Ascend 时的设备发现和设置错误。[#884](https://github.com/vllm-" +"project/vllm-ascend/pull/884)" #: ../../source/user_guide/release_notes.md:1133 msgid "" @@ -6007,14 +6592,16 @@ msgid "" "[#1268](https://github.com/vllm-project/vllm-ascend/pull/1268)" msgstr "" "DeepSeek 现已可以与 " -"[MC2](https://www.hiascend.com/document/detail/zh/canncommercial/81RC1/developmentguide/opdevg/ascendcbestP/atlas_ascendc_best_practices_10_0043.html)(计算与通信融合)正常工作。[#1268](https://github.com/vllm-project/vllm-ascend/pull/1268)" +"[MC2](https://www.hiascend.com/document/detail/zh/canncommercial/81RC1/developmentguide/opdevg/ascendcbestP/atlas_ascendc_best_practices_10_0043.html)(计算与通信融合)正常工作。[#1268](https://github.com" +"/vllm-project/vllm-ascend/pull/1268)" #: ../../source/user_guide/release_notes.md:1134 msgid "" "Fixed log2phy NoneType bug with static EPLB feature. " "[#1186](https://github.com/vllm-project/vllm-ascend/pull/1186)" msgstr "" -"修复了启用静态 EPLB 功能时出现的 log2phy NoneType 错误。[#1186](https://github.com/vllm-project/vllm-ascend/pull/1186)" +"修复了启用静态 EPLB 功能时出现的 log2phy NoneType 错误。[#1186](https://github.com/vllm-" +"project/vllm-ascend/pull/1186)" #: ../../source/user_guide/release_notes.md:1135 msgid "" @@ -6022,14 +6609,17 @@ msgid "" "[#997](https://github.com/vllm-project/vllm-ascend/pull/997), " "[#1135](https://github.com/vllm-project/vllm-ascend/pull/1135)" msgstr "" -"启用 DBO 后,DeepSeek 的性能得到提升。[#997](https://github.com/vllm-project/vllm-ascend/pull/997), [#1135](https://github.com/vllm-project/vllm-ascend/pull/1135)" +"启用 DBO 后,DeepSeek 的性能得到提升。[#997](https://github.com/vllm-project/vllm-" +"ascend/pull/997), [#1135](https://github.com/vllm-project/vllm-" +"ascend/pull/1135)" #: ../../source/user_guide/release_notes.md:1136 msgid "" "Refactoring AscendFusedMoE [#1229](https://github.com/vllm-project/vllm-" "ascend/pull/1229)" msgstr "" -"重构 AscendFusedMoE [#1229](https://github.com/vllm-project/vllm-ascend/pull/1229)" +"重构 AscendFusedMoE [#1229](https://github.com/vllm-project/vllm-" +"ascend/pull/1229)" #: ../../source/user_guide/release_notes.md:1137 msgid "" @@ -6037,7 +6627,9 @@ msgid "" "Turbo/GPUStack) [#1224](https://github.com/vllm-project/vllm-" "ascend/pull/1224)" msgstr "" -"新增初始用户案例页面(包含 LLaMA-Factory/TRL/verl/MindIE Turbo/GPUStack)[#1224](https://github.com/vllm-project/vllm-ascend/pull/1224)" +"新增初始用户案例页面(包含 LLaMA-Factory/TRL/verl/MindIE " +"Turbo/GPUStack)[#1224](https://github.com/vllm-project/vllm-" +"ascend/pull/1224)" #: ../../source/user_guide/release_notes.md:1138 msgid "" @@ -6083,42 +6675,46 @@ msgid "" "@farawayboat made their first contribution in " msgstr "" -"@farawayboat 在 中完成了首次贡献" +"@farawayboat 在 " +"中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1152 msgid "" "@yzim made their first contribution in " -msgstr "" -"@yzim 在 中完成了首次贡献" +msgstr "@yzim 在 中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1153 msgid "" "@chenwaner made their first contribution in " msgstr "" -"@chenwaner 在 中完成了首次贡献" +"@chenwaner 在 " +"中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1154 msgid "" "@wangyanhui-cmss made their first contribution in " msgstr "" -"@wangyanhui-cmss 在 中完成了首次贡献" +"@wangyanhui-cmss 在 中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1155 msgid "" "@songshanhu07 made their first contribution in " msgstr "" -"@songshanhu07 在 中完成了首次贡献" +"@songshanhu07 在 " +"中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1156 msgid "" "@yuancaoyaoHW made their first contribution in " msgstr "" -"@yuancaoyaoHW 在 中完成了首次贡献" +"@yuancaoyaoHW 在 " +"中完成了首次贡献" #: ../../source/user_guide/release_notes.md:1158 msgid "" @@ -6159,9 +6755,9 @@ msgid "" "maintained any more. Please set environment `VLLM_USE_V1=1` to enable V1 " "Engine." msgstr "" -"这是 vllm-ascend v0.9.0 的第一个候选发布版本。请按照[官方文档](https://github.com/vllm-project/vllm-" -"ascend/tree/v0.9.0rc1)开始使用。从此版本起,推荐使用 V1 引擎。V0 引擎的代码已被冻结,不再维护。如需启用 V1" -" 引擎,请设置环境变量 `VLLM_USE_V1=1`。" +"这是 vllm-ascend v0.9.0 的第一个候选发布版本。请按照[官方文档](https://github.com/vllm-" +"project/vllm-ascend/tree/v0.9.0rc1)开始使用。从此版本起,推荐使用 V1 引擎。V0 " +"引擎的代码已被冻结,不再维护。如需启用 V1 引擎,请设置环境变量 `VLLM_USE_V1=1`。" #: ../../source/user_guide/release_notes.md:1174 msgid "" @@ -6170,7 +6766,8 @@ msgid "" " to take a try. [#789](https://github.com/vllm-project/vllm-" "ascend/pull/789)" msgstr "" -"DeepSeek 现在已支持图模式。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/graph_mode.html)进行尝试。[#789](https://github.com" +"DeepSeek " +"现在已支持图模式。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/latest/user_guide/feature_guide/graph_mode.html)进行尝试。[#789](https://github.com" "/vllm-project/vllm-ascend/pull/789)" #: ../../source/user_guide/release_notes.md:1175 @@ -6353,8 +6950,7 @@ msgstr "" msgid "" "The contributor doc site is " "[added](https://docs.vllm.ai/projects/ascend/en/latest/community/contributors.html)" -msgstr "" -"贡献者文档站点已[添加](https://docs.vllm.ai/projects/ascend/en/latest/community/contributors.html)" +msgstr "贡献者文档站点已[添加](https://docs.vllm.ai/projects/ascend/en/latest/community/contributors.html)" #: ../../source/user_guide/release_notes.md:1203 msgid "" @@ -6381,7 +6977,8 @@ msgid "" "doc](https://docs.vllm.ai/projects/ascend/en/v0.7.3) to start the " "journey. It includes the following changes:" msgstr "" -"这是 0.7.3 版本的第一个补丁发布。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.3)开始使用。本次更新包含以下更改:" +"这是 0.7.3 " +"版本的第一个补丁发布。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.3)开始使用。本次更新包含以下更改:" #: ../../source/user_guide/release_notes.md:1212 msgid "" @@ -6391,7 +6988,9 @@ msgid "" "/vllm-ascend/pull/903) [#915](https://github.com/vllm-project/vllm-" "ascend/pull/915)" msgstr "" -"现已支持 Qwen3 和 Qwen3MOE。Qwen3 的性能和精度已通过充分测试,您可以立即试用。推荐使用 Mindie Turbo 以提升 Qwen3 的性能。[#903](https://github.com/vllm-project/vllm-ascend/pull/903) [#915](https://github.com/vllm-project/vllm-ascend/pull/915)" +"现已支持 Qwen3 和 Qwen3MOE。Qwen3 的性能和精度已通过充分测试,您可以立即试用。推荐使用 Mindie Turbo 以提升 " +"Qwen3 的性能。[#903](https://github.com/vllm-project/vllm-ascend/pull/903) " +"[#915](https://github.com/vllm-project/vllm-ascend/pull/915)" #: ../../source/user_guide/release_notes.md:1213 msgid "" @@ -6401,7 +7000,10 @@ msgid "" "/vllm-project/vllm-ascend/pull/878) [Doc " "Link](https://docs.vllm.ai/projects/ascend/en/v0.7.3/developer_guide/performance/optimization_and_tuning.html)" msgstr "" -"新增了一份性能指南。该指南旨在帮助用户在系统层面提升 vllm-ascend 的性能,内容包括操作系统配置、库优化、部署指南等。[#878](https://github.com/vllm-project/vllm-ascend/pull/878) [文档链接](https://docs.vllm.ai/projects/ascend/en/v0.7.3/developer_guide/performance/optimization_and_tuning.html)" +"新增了一份性能指南。该指南旨在帮助用户在系统层面提升 vllm-ascend " +"的性能,内容包括操作系统配置、库优化、部署指南等。[#878](https://github.com/vllm-project/vllm-" +"ascend/pull/878) " +"[文档链接](https://docs.vllm.ai/projects/ascend/en/v0.7.3/developer_guide/performance/optimization_and_tuning.html)" #: ../../source/user_guide/release_notes.md:1215 msgid "Bug Fixes" @@ -6412,7 +7014,8 @@ msgid "" "Qwen2.5-VL works for RLHF scenarios now. [#928](https://github.com/vllm-" "project/vllm-ascend/pull/928)" msgstr "" -"Qwen2.5-VL 现已支持 RLHF 场景。[#928](https://github.com/vllm-project/vllm-ascend/pull/928)" +"Qwen2.5-VL 现已支持 RLHF 场景。[#928](https://github.com/vllm-project/vllm-" +"ascend/pull/928)" #: ../../source/user_guide/release_notes.md:1218 msgid "" @@ -6421,14 +7024,17 @@ msgid "" "ascend/pull/858) [#918](https://github.com/vllm-project/vllm-" "ascend/pull/918)" msgstr "" -"用户现在可以直接从在线权重启动模型,例如直接从 huggingface 或 modelscope 获取。[#858](https://github.com/vllm-project/vllm-ascend/pull/858) [#918](https://github.com/vllm-project/vllm-ascend/pull/918)" +"用户现在可以直接从在线权重启动模型,例如直接从 huggingface 或 modelscope " +"获取。[#858](https://github.com/vllm-project/vllm-ascend/pull/858) " +"[#918](https://github.com/vllm-project/vllm-ascend/pull/918)" #: ../../source/user_guide/release_notes.md:1219 msgid "" "The meaningless log info `UserWorkspaceSize0` has been cleaned. " "[#911](https://github.com/vllm-project/vllm-ascend/pull/911)" msgstr "" -"无意义的日志信息 `UserWorkspaceSize0` 已被清理。[#911](https://github.com/vllm-project/vllm-ascend/pull/911)" +"无意义的日志信息 `UserWorkspaceSize0` 已被清理。[#911](https://github.com/vllm-project" +"/vllm-ascend/pull/911)" #: ../../source/user_guide/release_notes.md:1220 msgid "" @@ -6436,7 +7042,8 @@ msgid "" "`warning` instead of `error`. [#956](https://github.com/vllm-project" "/vllm-ascend/pull/956)" msgstr "" -"`Failed to import vllm_ascend_C` 的日志级别已从 `error` 更改为 `warning`。[#956](https://github.com/vllm-project/vllm-ascend/pull/956)" +"`Failed to import vllm_ascend_C` 的日志级别已从 `error` 更改为 " +"`warning`。[#956](https://github.com/vllm-project/vllm-ascend/pull/956)" #: ../../source/user_guide/release_notes.md:1221 msgid "" @@ -6445,21 +7052,26 @@ msgid "" "[#849](https://github.com/vllm-project/vllm-ascend/pull/849) " "[#936](https://github.com/vllm-project/vllm-ascend/pull/936)" msgstr "" -"DeepSeek MLA 现已在 V1 引擎中支持分块预填充。请注意,0.7.3 版本中的 V1 引擎仅为实验性功能,仅供测试使用。[#849](https://github.com/vllm-project/vllm-ascend/pull/849) [#936](https://github.com/vllm-project/vllm-ascend/pull/936)" +"DeepSeek MLA 现已在 V1 引擎中支持分块预填充。请注意,0.7.3 版本中的 V1 " +"引擎仅为实验性功能,仅供测试使用。[#849](https://github.com/vllm-project/vllm-" +"ascend/pull/849) [#936](https://github.com/vllm-project/vllm-" +"ascend/pull/936)" #: ../../source/user_guide/release_notes.md:1225 msgid "" "The benchmark doc is updated for Qwen2.5 and Qwen2.5-VL " "[#792](https://github.com/vllm-project/vllm-ascend/pull/792)" msgstr "" -"基准测试文档已针对 Qwen2.5 和 Qwen2.5-VL 进行更新 [#792](https://github.com/vllm-project/vllm-ascend/pull/792)" +"基准测试文档已针对 Qwen2.5 和 Qwen2.5-VL 进行更新 [#792](https://github.com/vllm-" +"project/vllm-ascend/pull/792)" #: ../../source/user_guide/release_notes.md:1226 msgid "" "Add the note to clear that only \"modelscope<1.23.0\" works with 0.7.3. " "[#954](https://github.com/vllm-project/vllm-ascend/pull/954)" msgstr "" -"添加说明,明确指出只有 \"modelscope<1.23.0\" 可与 0.7.3 版本兼容使用。[#954](https://github.com/vllm-project/vllm-ascend/pull/954)" +"添加说明,明确指出只有 \"modelscope<1.23.0\" 可与 0.7.3 " +"版本兼容使用。[#954](https://github.com/vllm-project/vllm-ascend/pull/954)" #: ../../source/user_guide/release_notes.md:1228 msgid "v0.7.3 - 2025.05.08" @@ -6480,7 +7092,8 @@ msgid "" "doc](https://docs.vllm.ai/projects/ascend/en/v0.7.3) to start the " "journey." msgstr "" -"我们很高兴地宣布 vllm-ascend 0.7.3 版本正式发布。这是首个官方正式版本。该版本的功能、性能和稳定性均已通过全面测试和验证。我们鼓励您试用并提供反馈。如有需要,我们将在未来发布错误修复版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.3)开始使用。" +"我们很高兴地宣布 vllm-ascend 0.7.3 " +"版本正式发布。这是首个官方正式版本。该版本的功能、性能和稳定性均已通过全面测试和验证。我们鼓励您试用并提供反馈。如有需要,我们将在未来发布错误修复版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.3)开始使用。" #: ../../source/user_guide/release_notes.md:1236 msgid "" @@ -6490,13 +7103,18 @@ msgid "" "project/vllm-ascend/releases/tag/v0.7.3rc1), " "[v0.7.3rc2](https://github.com/vllm-project/vllm-" "ascend/releases/tag/v0.7.3rc2)). And all the features are fully tested " -"and verified. Visit the official doc the get the detail " +"and verified. Visit the official doc to get the detail " "[feature](https://docs.vllm.ai/projects/ascend/en/v0.7.3/user_guide/suppoted_features.html)" " and " "[model](https://docs.vllm.ai/projects/ascend/en/v0.7.3/user_guide/supported_models.html)" " support matrix." msgstr "" -"本次发布包含了之前所有候选版本中已实现的功能([v0.7.1rc1](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.7.1rc1)、[v0.7.3rc1](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.7.3rc1)、[v0.7.3rc2](https://github.com/vllm-project/vllm-ascend/releases/tag/v0.7.3rc2))。所有功能均已通过全面测试和验证。请访问官方文档以获取详细的[功能](https://docs.vllm.ai/projects/ascend/en/v0.7.3/user_guide/suppoted_features.html)和[模型](https://docs.vllm.ai/projects/ascend/en/v0.7.3/user_guide/supported_models.html)支持矩阵。" +"本次发布包含了之前所有候选版本中已实现的功能([v0.7.1rc1](https://github.com/vllm-project/vllm-" +"ascend/releases/tag/v0.7.1rc1)、[v0.7.3rc1](https://github.com/vllm-" +"project/vllm-" +"ascend/releases/tag/v0.7.3rc1)、[v0.7.3rc2](https://github.com/vllm-" +"project/vllm-" +"ascend/releases/tag/v0.7.3rc2))。所有功能均已通过全面测试和验证。请访问官方文档以获取详细的[功能](https://docs.vllm.ai/projects/ascend/en/v0.7.3/user_guide/suppoted_features.html)和[模型](https://docs.vllm.ai/projects/ascend/en/v0.7.3/user_guide/supported_models.html)支持矩阵。" #: ../../source/user_guide/release_notes.md:1237 msgid "" @@ -6511,7 +7129,9 @@ msgid "" " The 2.5.1 version of torch-npu will be installed automatically. " "[#662](https://github.com/vllm-project/vllm-ascend/pull/662)" msgstr "" -"将 PyTorch 升级至 2.5.1。vLLM Ascend 现在不再依赖 torch-npu 的开发版本。用户无需再手动安装 torch-npu,2.5.1 版本的 torch-npu 将被自动安装。[#662](https://github.com/vllm-project/vllm-ascend/pull/662)" +"将 PyTorch 升级至 2.5.1。vLLM Ascend 现在不再依赖 torch-npu 的开发版本。用户无需再手动安装 torch-" +"npu,2.5.1 版本的 torch-npu 将被自动安装。[#662](https://github.com/vllm-project" +"/vllm-ascend/pull/662)" #: ../../source/user_guide/release_notes.md:1239 msgid "" @@ -6519,7 +7139,8 @@ msgid "" " series performance. [#708](https://github.com/vllm-project/vllm-" "ascend/pull/708)" msgstr "" -"将 MindIE Turbo 集成到 vLLM Ascend 中,以提升 DeepSeek V3/R1、Qwen 2 系列的性能。[#708](https://github.com/vllm-project/vllm-ascend/pull/708)" +"将 MindIE Turbo 集成到 vLLM Ascend 中,以提升 DeepSeek V3/R1、Qwen 2 " +"系列的性能。[#708](https://github.com/vllm-project/vllm-ascend/pull/708)" #: ../../source/user_guide/release_notes.md:1243 msgid "" @@ -6528,26 +7149,35 @@ msgid "" "more usage information. Thanks for the contribution from China Merchants " "Bank. [#700](https://github.com/vllm-project/vllm-ascend/pull/700)" msgstr "" -"现已支持 LoRA、多 LoRA 和动态服务。性能将在下一个版本中得到提升。请参阅官方文档以获取更多使用信息。感谢招商银行的贡献。[#700](https://github.com/vllm-project/vllm-ascend/pull/700)" +"现已支持 LoRA、多 LoRA " +"和动态服务。性能将在下一个版本中得到提升。请参阅官方文档以获取更多使用信息。感谢招商银行的贡献。[#700](https://github.com" +"/vllm-project/vllm-ascend/pull/700)" #: ../../source/user_guide/release_notes.md:1247 msgid "" "The performance of Qwen2 vl and Qwen2.5 vl is improved. " "[#702](https://github.com/vllm-project/vllm-ascend/pull/702)" -msgstr "Qwen2 vl 和 Qwen2.5 vl 的性能已得到提升。[#702](https://github.com/vllm-project/vllm-ascend/pull/702)" +msgstr "" +"Qwen2 vl 和 Qwen2.5 vl 的性能已得到提升。[#702](https://github.com/vllm-project" +"/vllm-ascend/pull/702)" #: ../../source/user_guide/release_notes.md:1248 msgid "" "The performance of `apply_penalties` and `topKtopP` ops are improved. " "[#525](https://github.com/vllm-project/vllm-ascend/pull/525)" -msgstr "`apply_penalties` 和 `topKtopP` 操作的性能已得到提升。[#525](https://github.com/vllm-project/vllm-ascend/pull/525)" +msgstr "" +"`apply_penalties` 和 `topKtopP` 操作的性能已得到提升。[#525](https://github.com/vllm-" +"project/vllm-ascend/pull/525)" #: ../../source/user_guide/release_notes.md:1252 msgid "" "Fixed a issue that may lead CPU memory leak. [#691](https://github.com" "/vllm-project/vllm-ascend/pull/691) [#712](https://github.com/vllm-" "project/vllm-ascend/pull/712)" -msgstr "修复了一个可能导致 CPU 内存泄漏的问题。[#691](https://github.com/vllm-project/vllm-ascend/pull/691) [#712](https://github.com/vllm-project/vllm-ascend/pull/712)" +msgstr "" +"修复了一个可能导致 CPU 内存泄漏的问题。[#691](https://github.com/vllm-project/vllm-" +"ascend/pull/691) [#712](https://github.com/vllm-project/vllm-" +"ascend/pull/712)" #: ../../source/user_guide/release_notes.md:1253 msgid "" @@ -6555,19 +7185,25 @@ msgid "" "error when building with custom ops enabled, please set `SOC_VERSION` to " "a suitable value. [#606](https://github.com/vllm-project/vllm-" "ascend/pull/606)" -msgstr "新增了一个环境变量 `SOC_VERSION`。如果在启用自定义算子构建时遇到任何 SoC 检测错误,请将 `SOC_VERSION` 设置为合适的值。[#606](https://github.com/vllm-project/vllm-ascend/pull/606)" +msgstr "" +"新增了一个环境变量 `SOC_VERSION`。如果在启用自定义算子构建时遇到任何 SoC 检测错误,请将 `SOC_VERSION` " +"设置为合适的值。[#606](https://github.com/vllm-project/vllm-ascend/pull/606)" #: ../../source/user_guide/release_notes.md:1254 msgid "" "openEuler container image supported with v0.7.3-openeuler tag. " "[#665](https://github.com/vllm-project/vllm-ascend/pull/665)" -msgstr "现已支持带有 v0.7.3-openeuler 标签的 openEuler 容器镜像。[#665](https://github.com/vllm-project/vllm-ascend/pull/665)" +msgstr "" +"现已支持带有 v0.7.3-openeuler 标签的 openEuler 容器镜像。[#665](https://github.com" +"/vllm-project/vllm-ascend/pull/665)" #: ../../source/user_guide/release_notes.md:1255 msgid "" "Prefix cache feature works on V1 engine now. [#559](https://github.com" "/vllm-project/vllm-ascend/pull/559)" -msgstr "前缀缓存功能现已在 V1 引擎上正常工作。[#559](https://github.com/vllm-project/vllm-ascend/pull/559)" +msgstr "" +"前缀缓存功能现已在 V1 引擎上正常工作。[#559](https://github.com/vllm-project/vllm-" +"ascend/pull/559)" #: ../../source/user_guide/release_notes.md:1257 msgid "v0.8.5rc1 - 2025.05.06" @@ -6577,26 +7213,35 @@ msgstr "v0.8.5rc1 - 2025.05.06" msgid "" "This is the 1st release candidate of v0.8.5 for vllm-ascend. Please " "follow the [official doc](https://github.com/vllm-project/vllm-" -"ascend/tree/v0.8.5rc1) to start the journey. Now you can enable V1 egnine" +"ascend/tree/v0.8.5rc1) to start the journey. Now you can enable V1 engine" " by setting the environment variable `VLLM_USE_V1=1`, see the feature " "support status of vLLM Ascend in [supported_features](https://github.com" "/vllm-project/vllm-" "ascend/blob/v0.8.5rc1/docs/source/user_guide/suppoted_features.md)." msgstr "" -"这是 vllm-ascend v0.8.5 的第一个候选发布版本。请按照[官方文档](https://github.com/vllm-project/vllm-ascend/tree/v0.8.5rc1)开始使用。现在,您可以通过设置环境变量 `VLLM_USE_V1=1` 来启用 V1 引擎。vLLM Ascend 的功能支持状态请参见 [supported_features](https://github.com/vllm-project/vllm-ascend/blob/v0.8.5rc1/docs/source/user_guide/suppoted_features.md)。" +"这是 vllm-ascend v0.8.5 的第一个候选发布版本。请按照[官方文档](https://github.com/vllm-" +"project/vllm-ascend/tree/v0.8.5rc1)开始使用。现在,您可以通过设置环境变量 `VLLM_USE_V1=1` " +"来启用 V1 引擎。vLLM Ascend 的功能支持状态请参见 [supported_features](https://github.com" +"/vllm-project/vllm-" +"ascend/blob/v0.8.5rc1/docs/source/user_guide/suppoted_features.md)。" #: ../../source/user_guide/release_notes.md:1263 msgid "" "Upgrade CANN version to 8.1.RC1 to support chunked prefill and automatic " "prefix caching (`--enable_prefix_caching`) when V1 is enabled " "[#747](https://github.com/vllm-project/vllm-ascend/pull/747)" -msgstr "将 CANN 版本升级到 8.1.RC1,以支持在启用 V1 时的分块预填充和自动前缀缓存(`--enable_prefix_caching`)功能[#747](https://github.com/vllm-project/vllm-ascend/pull/747)" +msgstr "" +"将 CANN 版本升级到 8.1.RC1,以支持在启用 V1 " +"时的分块预填充和自动前缀缓存(`--enable_prefix_caching`)功能[#747](https://github.com" +"/vllm-project/vllm-ascend/pull/747)" #: ../../source/user_guide/release_notes.md:1264 msgid "" "Optimize Qwen2 VL and Qwen 2.5 VL [#701](https://github.com/vllm-project" "/vllm-ascend/pull/701)" -msgstr "优化 Qwen2 VL 和 Qwen 2.5 VL [#701](https://github.com/vllm-project/vllm-ascend/pull/701)" +msgstr "" +"优化 Qwen2 VL 和 Qwen 2.5 VL [#701](https://github.com/vllm-project/vllm-" +"ascend/pull/701)" #: ../../source/user_guide/release_notes.md:1265 msgid "" @@ -6604,31 +7249,43 @@ msgid "" "use --additional_config={'enable_graph_mode': True} to enable graph mode." " [#598](https://github.com/vllm-project/vllm-ascend/pull/598) " "[#719](https://github.com/vllm-project/vllm-ascend/pull/719)" -msgstr "改进了 Deepseek V3 的 eager 模式和图模式性能,现在您可以使用 --additional_config={'enable_graph_mode': True} 来启用图模式。[#598](https://github.com/vllm-project/vllm-ascend/pull/598) [#719](https://github.com/vllm-project/vllm-ascend/pull/719)" +msgstr "" +"改进了 Deepseek V3 的 eager 模式和图模式性能,现在您可以使用 " +"--additional_config={'enable_graph_mode': True} " +"来启用图模式。[#598](https://github.com/vllm-project/vllm-ascend/pull/598) " +"[#719](https://github.com/vllm-project/vllm-ascend/pull/719)" #: ../../source/user_guide/release_notes.md:1269 msgid "" "Upgrade vLLM to 0.8.5.post1 [#715](https://github.com/vllm-project/vllm-" "ascend/pull/715)" -msgstr "将 vLLM 升级到 0.8.5.post1 [#715](https://github.com/vllm-project/vllm-ascend/pull/715)" +msgstr "" +"将 vLLM 升级到 0.8.5.post1 [#715](https://github.com/vllm-project/vllm-" +"ascend/pull/715)" #: ../../source/user_guide/release_notes.md:1270 msgid "" "Fix early return in CustomDeepseekV2MoE.forward during profile_run " "[#682](https://github.com/vllm-project/vllm-ascend/pull/682)" -msgstr "修复了在 profile_run 期间 CustomDeepseekV2MoE.forward 过早返回的问题 [#682](https://github.com/vllm-project/vllm-ascend/pull/682)" +msgstr "" +"修复了在 profile_run 期间 CustomDeepseekV2MoE.forward 过早返回的问题 " +"[#682](https://github.com/vllm-project/vllm-ascend/pull/682)" #: ../../source/user_guide/release_notes.md:1271 msgid "" "Adapts for new quant model generated by modelslim " "[#719](https://github.com/vllm-project/vllm-ascend/pull/719)" -msgstr "适配由 modelslim 生成的新量化模型 [#719](https://github.com/vllm-project/vllm-ascend/pull/719)" +msgstr "" +"适配由 modelslim 生成的新量化模型 [#719](https://github.com/vllm-project/vllm-" +"ascend/pull/719)" #: ../../source/user_guide/release_notes.md:1272 msgid "" "Initial support on P2P Disaggregated Prefill based on llm_datadist " "[#694](https://github.com/vllm-project/vllm-ascend/pull/694)" -msgstr "基于 llm_datadist 的 P2P 分布式预填充初步支持 [#694](https://github.com/vllm-project/vllm-ascend/pull/694)" +msgstr "" +"基于 llm_datadist 的 P2P 分布式预填充初步支持 [#694](https://github.com/vllm-project" +"/vllm-ascend/pull/694)" #: ../../source/user_guide/release_notes.md:1273 msgid "" @@ -6644,7 +7301,7 @@ msgid "" "Optimize NPU memory usage to make DeepSeek R1 W8A8 32K model len work. " "[#728](https://github.com/vllm-project/vllm-ascend/pull/728)" msgstr "" -"优化NPU内存使用,以使 DeepSeek R1 W8A8 32K 模型长度能够运行。[#728](https://github.com" +"优化 NPU 内存使用,使 DeepSeek R1 W8A8 32K 模型长度能够运行。[#728](https://github.com" "/vllm-project/vllm-ascend/pull/728)" #: ../../source/user_guide/release_notes.md:1275 @@ -6687,8 +7344,9 @@ msgid "" "are included in this version, such as W8A8 quantization and EP/DP " "support. We'll make them stable enough in the next release." msgstr "" -"这是 vllm-ascend v0.8.4 的第二个候选版本。请遵循[官方文档](https://github.com/vllm-project/vllm-" -"ascend/tree/v0.8.4rc2)开始使用。此版本包含一些实验性功能,例如 W8A8 量化和 EP/DP 支持。我们将在下一个版本中使其足够稳定。" +"这是 vllm-ascend v0.8.4 的第二个候选版本。请遵循[官方文档](https://github.com/vllm-project" +"/vllm-ascend/tree/v0.8.4rc2)开始使用。此版本包含一些实验性功能,例如 W8A8 量化和 EP/DP " +"支持。我们将在下一个版本中使其足够稳定。" #: ../../source/user_guide/release_notes.md:1289 msgid "" @@ -6712,8 +7370,9 @@ msgid "" msgstr "" "现已支持 Ascend W8A8 量化方法。请参考[官方文档](https://github.com/vllm-project/vllm-" "ascend/blob/v0.8.4rc2/docs/source/tutorials/multi_npu_quantization.md) " -"示例。欢迎提供任何[反馈](https://github.com/vllm-project/vllm-ascend/issues/619)。[#580](https://github.com" -"/vllm-project/vllm-ascend/pull/580)" +"示例。欢迎提供任何[反馈](https://github.com/vllm-project/vllm-" +"ascend/issues/619)。[#580](https://github.com/vllm-project/vllm-" +"ascend/pull/580)" #: ../../source/user_guide/release_notes.md:1291 msgid "" @@ -6749,15 +7408,17 @@ msgid "" " The 2.5.1 version of torch-npu will be installed automatically. " "[#661](https://github.com/vllm-project/vllm-ascend/pull/661)" msgstr "" -"升级 PyTorch 至 2.5.1。vLLM Ascend 现在不再依赖开发版本的 torch-npu。用户无需手动安装 torch-npu。" -" torch-npu 的 2.5.1 版本将会自动安装。[#661](https://github.com/vllm-project/vllm-ascend/pull/661)" +"升级 PyTorch 至 2.5.1。vLLM Ascend 现在不再依赖开发版本的 torch-npu。用户无需手动安装 torch-npu。 " +"torch-npu 的 2.5.1 版本将会自动安装。[#661](https://github.com/vllm-project/vllm-" +"ascend/pull/661)" #: ../../source/user_guide/release_notes.md:1300 msgid "" "MiniCPM model works now. [#645](https://github.com/vllm-project/vllm-" "ascend/pull/645)" msgstr "" -"MiniCPM 模型现在可以运行了。[#645](https://github.com/vllm-project/vllm-ascend/pull/645)" +"MiniCPM 模型现在可以运行了。[#645](https://github.com/vllm-project/vllm-" +"ascend/pull/645)" #: ../../source/user_guide/release_notes.md:1301 msgid "" @@ -6765,23 +7426,25 @@ msgid "" "customs Ops build is enabled by default for openEuler OS. " "[#689](https://github.com/vllm-project/vllm-ascend/pull/689)" msgstr "" -"openEuler 容器镜像已支持 `v0.8.4-openeuler` 标签,并且 openEuler 操作系统默认启用了自定义算子构建。[#689](https://github.com" -"/vllm-project/vllm-ascend/pull/689)" +"openEuler 容器镜像已支持 `v0.8.4-openeuler` 标签,并且 openEuler " +"操作系统默认启用了自定义算子构建。[#689](https://github.com/vllm-project/vllm-" +"ascend/pull/689)" #: ../../source/user_guide/release_notes.md:1302 msgid "" "Fix ModuleNotFoundError bug to make Lora work [#600](https://github.com" "/vllm-project/vllm-ascend/pull/600)" msgstr "" -"修复 ModuleNotFoundError 错误以使 LoRA 正常工作 [#600](https://github.com/vllm-project/vllm-" -"ascend/pull/600)" +"修复 ModuleNotFoundError 错误以使 LoRA 正常工作 [#600](https://github.com/vllm-" +"project/vllm-ascend/pull/600)" #: ../../source/user_guide/release_notes.md:1303 msgid "" "Add \"Using EvalScope evaluation\" doc [#611](https://github.com/vllm-" "project/vllm-ascend/pull/611)" msgstr "" -"添加了“使用 EvalScope 评估”文档 [#611](https://github.com/vllm-project/vllm-ascend/pull/611)" +"添加了“使用 EvalScope 评估”文档 [#611](https://github.com/vllm-project/vllm-" +"ascend/pull/611)" #: ../../source/user_guide/release_notes.md:1304 msgid "" @@ -6809,9 +7472,11 @@ msgid "" "documentation](https://docs.vllm.ai/projects/ascend/en/latest/community/versioning_policy.html" "#release-window)." msgstr "" -"这是 vllm-ascend v0.8.4 的第一个候选版本。请遵循[官方文档](https://github.com/vllm-project/vllm-" -"ascend/tree/v0.8.4rc1)开始使用。从本版本起,vllm-ascend 将跟随 vllm 的最新版本并每两周发布一次。例如,如果 " -"vllm 在接下来的两周内发布 v0.8.5,vllm-ascend 将发布 v0.8.5rc1,而不是 v0.8.4rc2。详细信息请参阅[官方文档](https://docs.vllm.ai/projects/ascend/en/latest/community/versioning_policy.html#release-window)。" +"这是 vllm-ascend v0.8.4 的第一个候选版本。请遵循[官方文档](https://github.com/vllm-project" +"/vllm-ascend/tree/v0.8.4rc1)开始使用。从本版本起,vllm-ascend 将跟随 vllm " +"的最新版本并每两周发布一次。例如,如果 vllm 在接下来的两周内发布 v0.8.5,vllm-ascend 将发布 v0.8.5rc1,而不是 " +"v0.8.4rc2。详细信息请参阅[官方文档](https://docs.vllm.ai/projects/ascend/en/latest/community/versioning_policy.html" +"#release-window)。" #: ../../source/user_guide/release_notes.md:1312 msgid "" @@ -6822,7 +7487,8 @@ msgid "" "work, please set `VLLM_USE_V1=1` environment if you want to use V1 " "forcibly." msgstr "" -"本版本包含了对 vLLM V1 引擎的实验性支持。你可以访问[官方指南](https://docs.vllm.ai/en/v0.8.4/getting_started/v1_user_guide.html)获取更多详细信息。默认情况下,如果" +"本版本包含了对 vLLM V1 " +"引擎的实验性支持。你可以访问[官方指南](https://docs.vllm.ai/en/v0.8.4/getting_started/v1_user_guide.html)获取更多详细信息。默认情况下,如果" " V1 不可用,vLLM 会自动回退到 V0。如果你想强制使用 V1,请设置 `VLLM_USE_V1=1` 环境变量。" #: ../../source/user_guide/release_notes.md:1313 @@ -6833,7 +7499,8 @@ msgid "" "information. Thanks for the contribution from China Merchants Bank. " "[#521](https://github.com/vllm-project/vllm-ascend/pull/521)." msgstr "" -"现已支持 LoRA、Multi-LoRA 和动态服务。性能将在下一个版本中得到提升。请遵循[官方文档](https://docs.vllm.ai/en/v0.8.4/features/lora.html)获取更多使用信息。感谢招商银行的贡献。[#521](https://github.com" +"现已支持 LoRA、Multi-LoRA " +"和动态服务。性能将在下一个版本中得到提升。请遵循[官方文档](https://docs.vllm.ai/en/v0.8.4/features/lora.html)获取更多使用信息。感谢招商银行的贡献。[#521](https://github.com" "/vllm-project/vllm-ascend/pull/521)。" #: ../../source/user_guide/release_notes.md:1314 @@ -6842,8 +7509,8 @@ msgid "" " engine support will come soon. [#513](https://github.com/vllm-project" "/vllm-ascend/pull/513)" msgstr "" -"已支持休眠模式功能。目前它只在 V0 引擎上工作,V1 引擎的支持即将到来。[#513](https://github.com/vllm-project" -"/vllm-ascend/pull/513)" +"已支持休眠模式功能。目前它只在 V0 引擎上工作,V1 引擎的支持即将到来。[#513](https://github.com/vllm-" +"project/vllm-ascend/pull/513)" #: ../../source/user_guide/release_notes.md:1318 msgid "" @@ -6872,8 +7539,8 @@ msgid "" "engine support will come soon. [#500](https://github.com/vllm-project" "/vllm-ascend/pull/500)" msgstr "" -"推测解码功能现已可用。目前它只在 V0 引擎上工作,V1 引擎的支持即将到来。[#500](https://github.com/vllm-project" -"/vllm-ascend/pull/500)" +"推测解码功能现已可用。目前它只在 V0 引擎上工作,V1 引擎的支持即将到来。[#500](https://github.com/vllm-" +"project/vllm-ascend/pull/500)" #: ../../source/user_guide/release_notes.md:1321 msgid "" @@ -6881,8 +7548,8 @@ msgid "" "supports xgrammar backend while using guidance backend may get some " "errors. [#555](https://github.com/vllm-project/vllm-ascend/pull/555)" msgstr "" -"结构化输出功能现在已在 V1 引擎上生效。目前仅支持 xgrammar 后端,使用 guidance 后端可能会出现一些错误。[#555](https://github.com" -"/vllm-project/vllm-ascend/pull/555)" +"结构化输出功能现在已在 V1 引擎上生效。目前仅支持 xgrammar 后端,使用 guidance " +"后端可能会出现一些错误。[#555](https://github.com/vllm-project/vllm-ascend/pull/555)" #: ../../source/user_guide/release_notes.md:1325 msgid "" @@ -6891,8 +7558,9 @@ msgid "" "will be added in the next release [#503](https://github.com/vllm-project" "/vllm-ascend/pull/503)" msgstr "" -"新增了一个通信器 `pyhccl`。它用于直接调用 CANN HCCL 库,而不是使用 `torch.distribute`。将在下一个版本中添加更多用法 " -"[#503](https://github.com/vllm-project/vllm-ascend/pull/503)。" +"新增了一个通信器 `pyhccl`。它用于直接调用 CANN HCCL 库,而不是使用 " +"`torch.distribute`。将在下一个版本中添加更多用法 [#503](https://github.com/vllm-project" +"/vllm-ascend/pull/503)。" #: ../../source/user_guide/release_notes.md:1326 msgid "" @@ -6902,9 +7570,9 @@ msgid "" "you don't need it. [#466](https://github.com/vllm-project/vllm-" "ascend/pull/466)" msgstr "" -"自定义算子的构建默认是启用的。你应该先安装如 `gcc`、`cmake` 等包以便从源码编译 `vllm-ascend`。如果不需要自定义算子的编译,可以设置环境变量 " -"`COMPILE_CUSTOM_KERNELS=0` 来禁用编译。 [#466](https://github.com/vllm-project/vllm-" -"ascend/pull/466)" +"自定义算子的构建默认是启用的。你应该先安装如 `gcc`、`cmake` 等包以便从源码编译 `vllm-" +"ascend`。如果不需要自定义算子的编译,可以设置环境变量 `COMPILE_CUSTOM_KERNELS=0` 来禁用编译。 " +"[#466](https://github.com/vllm-project/vllm-ascend/pull/466)" #: ../../source/user_guide/release_notes.md:1327 msgid "" @@ -6912,8 +7580,8 @@ msgid "" " performance. [#555](https://github.com/vllm-project/vllm-" "ascend/pull/555)" msgstr "" -"自定义算子 `rotary embedding` 现已默认启用,以提升性能。[#555](https://github.com/vllm-project/vllm-" -"ascend/pull/555)" +"自定义算子 `rotary embedding` 现已默认启用,以提升性能。[#555](https://github.com/vllm-" +"project/vllm-ascend/pull/555)" #: ../../source/user_guide/release_notes.md:1329 msgid "v0.7.3rc2 - 2025.03.29" @@ -6925,17 +7593,16 @@ msgid "" "the [official doc](https://docs.vllm.ai/projects/ascend/en/v0.7.3) to " "start the journey." msgstr "" -"这是 vllm-ascend v0.7.3 的第二个候选版本。请遵循[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.3)开始使用。" +"这是 vllm-ascend v0.7.3 " +"的第二个候选版本。请遵循[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.3)开始使用。" #: ../../source/user_guide/release_notes.md:1333 -#: ../../source/user_guide/release_notes.md:1359 msgid "" "Quickstart with container: " "" msgstr "容器快速入门:" #: ../../source/user_guide/release_notes.md:1334 -#: ../../source/user_guide/release_notes.md:1360 msgid "" "Installation: " "" @@ -6949,7 +7616,10 @@ msgid "" "installing vllm-ascend. Set `COMPILE_CUSTOM_KERNELS=1` to enable it. " "[#371](https://github.com/vllm-project/vllm-ascend/pull/371)" msgstr "" -"新增 Ascend 自定义算子框架。开发者现在可以使用 AscendC 编写自定义算子。已添加示例算子 `rotary_embedding`。更多教程即将发布。安装 vllm-ascend 时,自定义算子编译默认禁用。设置 `COMPILE_CUSTOM_KERNELS=1` 以启用此功能。 [#371](https://github.com/vllm-project/vllm-ascend/pull/371)" +"新增 Ascend 自定义算子框架。开发者现在可以使用 AscendC 编写自定义算子。已添加示例算子 " +"`rotary_embedding`。更多教程即将发布。安装 vllm-ascend 时,自定义算子编译默认禁用。设置 " +"`COMPILE_CUSTOM_KERNELS=1` 以启用此功能。 [#371](https://github.com/vllm-" +"project/vllm-ascend/pull/371)" #: ../../source/user_guide/release_notes.md:1339 msgid "" @@ -6959,7 +7629,10 @@ msgid "" "/vllm-ascend/issues/414). [#376](https://github.com/vllm-project/vllm-" "ascend/pull/376)" msgstr "" -"本版本已基本支持 V1 引擎。完整支持将在 0.8.X 版本中实现。如果您遇到任何问题或有 V1 引擎的相关需求,请通过[此问题](https://github.com/vllm-project/vllm-ascend/issues/414)告知我们。 [#376](https://github.com/vllm-project/vllm-ascend/pull/376)" +"本版本已基本支持 V1 引擎。完整支持将在 0.8.X 版本中实现。如果您遇到任何问题或有 V1 " +"引擎的相关需求,请通过[此问题](https://github.com/vllm-project/vllm-" +"ascend/issues/414)告知我们。 [#376](https://github.com/vllm-project/vllm-" +"ascend/pull/376)" #: ../../source/user_guide/release_notes.md:1340 msgid "" @@ -6967,7 +7640,8 @@ msgid "" "to enable it. [#282](https://github.com/vllm-project/vllm-" "ascend/pull/282)" msgstr "" -"前缀缓存功能现已可用。您可以通过设置 `enable_prefix_caching=True` 来启用它。 [#282](https://github.com/vllm-project/vllm-ascend/pull/282)" +"前缀缓存功能现已可用。您可以通过设置 `enable_prefix_caching=True` 来启用它。 " +"[#282](https://github.com/vllm-project/vllm-ascend/pull/282)" #: ../../source/user_guide/release_notes.md:1344 msgid "" @@ -6975,28 +7649,32 @@ msgid "" "output problem. [#406](https://github.com/vllm-project/vllm-" "ascend/pull/406)" msgstr "" -"将 torch_npu 版本升级至 dev20250320.3 以提高精度,修复 `!!!` 输出问题。 [#406](https://github.com/vllm-project/vllm-ascend/pull/406)" +"将 torch_npu 版本升级至 dev20250320.3 以提高精度,修复 `!!!` 输出问题。 " +"[#406](https://github.com/vllm-project/vllm-ascend/pull/406)" #: ../../source/user_guide/release_notes.md:1348 msgid "" "The performance of Qwen2-vl is improved by optimizing patch embedding " "(Conv3D). [#398](https://github.com/vllm-project/vllm-ascend/pull/398)" msgstr "" -"通过优化补丁嵌入(Conv3D),Qwen2-vl 的性能得到了提升。 [#398](https://github.com/vllm-project/vllm-ascend/pull/398)" +"通过优化补丁嵌入(Conv3D),Qwen2-vl 的性能得到了提升。 [#398](https://github.com/vllm-" +"project/vllm-ascend/pull/398)" #: ../../source/user_guide/release_notes.md:1352 msgid "" "Fixed a bug to make sure multi step scheduler feature work. " "[#349](https://github.com/vllm-project/vllm-ascend/pull/349)" msgstr "" -"修复了一个错误,确保多步调度器功能正常工作。 [#349](https://github.com/vllm-project/vllm-ascend/pull/349)" +"修复了一个错误,确保多步调度器功能正常工作。 [#349](https://github.com/vllm-project/vllm-" +"ascend/pull/349)" #: ../../source/user_guide/release_notes.md:1353 msgid "" "Fixed a bug to make prefix cache feature works with correct accuracy. " "[#424](https://github.com/vllm-project/vllm-ascend/pull/424)" msgstr "" -"修复了一个错误,使前缀缓存功能能够以正确的精度运行。 [#424](https://github.com/vllm-project/vllm-ascend/pull/424)" +"修复了一个错误,使前缀缓存功能能够以正确的精度运行。 [#424](https://github.com/vllm-project/vllm-" +"ascend/pull/424)" #: ../../source/user_guide/release_notes.md:1355 msgid "v0.7.3rc1 - 2025.03.14" @@ -7009,7 +7687,8 @@ msgid "" "doc](https://docs.vllm.ai/projects/ascend/en/v0.7.3) to start the " "journey." msgstr "" -"🎉 你好,世界!这是 vllm-ascend v0.7.3 的第一个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.3)开始您的旅程。" +"🎉 你好,世界!这是 vllm-ascend v0.7.3 " +"的第一个候选发布版本。请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.3)开始您的旅程。" #: ../../source/user_guide/release_notes.md:1364 msgid "" @@ -7017,7 +7696,9 @@ msgid "" "guide](https://docs.vllm.ai/projects/ascend/en/v0.7.3/tutorials/multi_node.html)" " to start! [#242](https://github.com/vllm-project/vllm-ascend/pull/242)" msgstr "" -"DeepSeek V3/R1 现在运行良好。请阅读[官方指南](https://docs.vllm.ai/projects/ascend/en/v0.7.3/tutorials/multi_node.html)开始使用! [#242](https://github.com/vllm-project/vllm-ascend/pull/242)" +"DeepSeek V3/R1 " +"现在运行良好。请阅读[官方指南](https://docs.vllm.ai/projects/ascend/en/v0.7.3/tutorials/multi_node.html)开始使用!" +" [#242](https://github.com/vllm-project/vllm-ascend/pull/242)" #: ../../source/user_guide/release_notes.md:1365 msgid "" @@ -7041,13 +7722,17 @@ msgid "" "/bge-base-en-v1.5` and `BAAI/bge-reranker-v2-m3` works now. " "[#229](https://github.com/vllm-project/vllm-ascend/pull/229)" msgstr "" -"新增了对池化模型的初步支持。基于 Bert 的模型,例如 `BAAI/bge-base-en-v1.5` 和 `BAAI/bge-reranker-v2-m3` 现已可用。 [#229](https://github.com/vllm-project/vllm-ascend/pull/229)" +"新增了对池化模型的初步支持。基于 Bert 的模型,例如 `BAAI/bge-base-en-v1.5` 和 `BAAI/bge-" +"reranker-v2-m3` 现已可用。 [#229](https://github.com/vllm-project/vllm-" +"ascend/pull/229)" #: ../../source/user_guide/release_notes.md:1375 msgid "" "The performance of Qwen2-VL is improved. [#241](https://github.com/vllm-" "project/vllm-ascend/pull/241)" -msgstr "Qwen2-VL 的性能得到了提升。 [#241](https://github.com/vllm-project/vllm-ascend/pull/241)" +msgstr "" +"Qwen2-VL 的性能得到了提升。 [#241](https://github.com/vllm-project/vllm-" +"ascend/pull/241)" #: ../../source/user_guide/release_notes.md:1376 msgid "" @@ -7059,7 +7744,9 @@ msgstr "现已支持 MiniCPM [#164](https://github.com/vllm-project/vllm-ascend/ msgid "" "Support MTP(Multi-Token Prediction) for DeepSeek V3/R1 " "[#236](https://github.com/vllm-project/vllm-ascend/pull/236)" -msgstr "为 DeepSeek V3/R1 支持 MTP(多标记预测) [#236](https://github.com/vllm-project/vllm-ascend/pull/236)" +msgstr "" +"为 DeepSeek V3/R1 支持 MTP(多标记预测) [#236](https://github.com/vllm-project" +"/vllm-ascend/pull/236)" #: ../../source/user_guide/release_notes.md:1381 msgid "" @@ -7068,13 +7755,16 @@ msgid "" "doc](https://docs.vllm.ai/projects/ascend/en/v0.7.3/tutorials/index.html)" " for detail" msgstr "" -"[文档] 新增了更多模型教程,包括 DeepSeek、QwQ、Qwen 和 Qwen 2.5VL。详情请参阅[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.3/tutorials/index.html)" +"[文档] 新增了更多模型教程,包括 DeepSeek、QwQ、Qwen 和 Qwen " +"2.5VL。详情请参阅[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.3/tutorials/index.html)" #: ../../source/user_guide/release_notes.md:1382 msgid "" "Pin modelscope<1.23.0 on vLLM v0.7.3 to resolve: " -msgstr "在 vLLM v0.7.3 上将 modelscope 版本锁定为 <1.23.0,以解决:" +msgstr "" +"在 vLLM v0.7.3 上将 modelscope 版本锁定为 <1.23.0,以解决:" #: ../../source/user_guide/release_notes.md:1386 msgid "" @@ -7082,7 +7772,8 @@ msgid "" "especially when the input/output is very long, the accuracy of output may" " be incorrect. We are working on it. It'll be fixed in the next release." msgstr "" -"在[某些情况下](https://github.com/vllm-project/vllm-ascend/issues/324),特别是当输入/输出非常长时,输出的准确性可能不正确。我们正在解决此问题,将在下一个版本中修复。" +"在[某些情况下](https://github.com/vllm-project/vllm-" +"ascend/issues/324),特别是当输入/输出非常长时,输出的准确性可能不正确。我们正在解决此问题,将在下一个版本中修复。" #: ../../source/user_guide/release_notes.md:1387 msgid "" @@ -7092,7 +7783,10 @@ msgid "" "Any [feedback](https://github.com/vllm-project/vllm-ascend/issues/267) is" " welcome. [#277](https://github.com/vllm-project/vllm-ascend/pull/277)" msgstr "" -"已改进并减少了模型输出中的乱码问题。但如果您仍然遇到此问题,请尝试更改生成配置参数,例如 `temperature`,然后重试。下方还列出了一个已知问题。欢迎提供任何[反馈](https://github.com/vllm-project/vllm-ascend/issues/267)。 [#277](https://github.com/vllm-project/vllm-ascend/pull/277)" +"已改进并减少了模型输出中的乱码问题。但如果您仍然遇到此问题,请尝试更改生成配置参数,例如 " +"`temperature`,然后重试。下方还列出了一个已知问题。欢迎提供任何[反馈](https://github.com/vllm-" +"project/vllm-ascend/issues/267)。 [#277](https://github.com/vllm-project" +"/vllm-ascend/pull/277)" #: ../../source/user_guide/release_notes.md:1389 msgid "v0.7.1rc1 - 2025.02.19" @@ -7110,7 +7804,8 @@ msgid "" "plugin for running vLLM on the Ascend NPU. With this release, users can " "now enjoy the latest features and improvements of vLLM on the Ascend NPU." msgstr "" -"vLLM Ascend 插件(vllm-ascend)是一个由社区维护的硬件插件,用于在 Ascend NPU 上运行 vLLM。通过此版本,用户现在可以在 Ascend NPU 上享受 vLLM 的最新功能和改进。" +"vLLM Ascend 插件(vllm-ascend)是一个由社区维护的硬件插件,用于在 Ascend NPU 上运行 " +"vLLM。通过此版本,用户现在可以在 Ascend NPU 上享受 vLLM 的最新功能和改进。" #: ../../source/user_guide/release_notes.md:1397 msgid "" @@ -7120,58 +7815,76 @@ msgid "" "bugs or issues. We appreciate your feedback and suggestions [this " "issue](https://github.com/vllm-project/vllm-ascend/issues/19)" msgstr "" -"请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.1)开始您的旅程。请注意,这是一个候选发布版本,可能存在一些错误或问题。我们非常感谢您的反馈和建议,请通过[此问题](https://github.com/vllm-project/vllm-ascend/issues/19)提交。" +"请按照[官方文档](https://docs.vllm.ai/projects/ascend/en/v0.7.1)开始您的旅程。请注意,这是一个候选发布版本,可能存在一些错误或问题。我们非常感谢您的反馈和建议,请通过[此问题](https://github.com" +"/vllm-project/vllm-ascend/issues/19)提交。" #: ../../source/user_guide/release_notes.md:1401 msgid "" "Initial supports for Ascend NPU on vLLM. [#3](https://github.com/vllm-" "project/vllm-ascend/pull/3)" -msgstr "在 vLLM 上初步支持 Ascend NPU。 [#3](https://github.com/vllm-project/vllm-ascend/pull/3)" +msgstr "" +"在 vLLM 上初步支持 Ascend NPU。 [#3](https://github.com/vllm-project/vllm-" +"ascend/pull/3)" #: ../../source/user_guide/release_notes.md:1402 msgid "" "DeepSeek is now supported. [#88](https://github.com/vllm-project/vllm-" "ascend/pull/88) [#68](https://github.com/vllm-project/vllm-" "ascend/pull/68)" -msgstr "现已支持 DeepSeek。 [#88](https://github.com/vllm-project/vllm-ascend/pull/88) [#68](https://github.com/vllm-project/vllm-ascend/pull/68)" +msgstr "" +"现已支持 DeepSeek。 [#88](https://github.com/vllm-project/vllm-ascend/pull/88)" +" [#68](https://github.com/vllm-project/vllm-ascend/pull/68)" #: ../../source/user_guide/release_notes.md:1403 msgid "" "Qwen, Llama series and other popular models are also supported, you can " "see more details in [supported_models](https://github.com/vllm-project" "/vllm-ascend/blob/v0.7.1rc1/docs/source/user_guide/supported_models.md)." -msgstr "Qwen、Llama 系列及其他流行模型也已支持,更多详情请参阅 [supported_models](https://github.com/vllm-project/vllm-ascend/blob/v0.7.1rc1/docs/source/user_guide/supported_models.md)。" +msgstr "" +"Qwen、Llama 系列及其他流行模型也已支持,更多详情请参阅 [supported_models](https://github.com" +"/vllm-project/vllm-" +"ascend/blob/v0.7.1rc1/docs/source/user_guide/supported_models.md)。" #: ../../source/user_guide/release_notes.md:1407 msgid "" "Added the Ascend quantization config option, the implementation will " "coming soon. [#7](https://github.com/vllm-project/vllm-ascend/pull/7) " "[#73](https://github.com/vllm-project/vllm-ascend/pull/73)" -msgstr "新增了 Ascend 量化配置选项,具体实现即将推出。 [#7](https://github.com/vllm-project/vllm-ascend/pull/7) [#73](https://github.com/vllm-project/vllm-ascend/pull/73)" +msgstr "" +"新增了 Ascend 量化配置选项,具体实现即将推出。 [#7](https://github.com/vllm-project/vllm-" +"ascend/pull/7) [#73](https://github.com/vllm-project/vllm-ascend/pull/73)" #: ../../source/user_guide/release_notes.md:1408 msgid "" "Add silu_and_mul and rope ops and add mix ops into attention layer. " "[#18](https://github.com/vllm-project/vllm-ascend/pull/18)" -msgstr "添加 silu_and_mul 和 rope 算子,并将混合算子加入注意力层。 [#18](https://github.com/vllm-project/vllm-ascend/pull/18)" +msgstr "" +"添加 silu_and_mul 和 rope 算子,并将混合算子加入注意力层。 [#18](https://github.com/vllm-" +"project/vllm-ascend/pull/18)" #: ../../source/user_guide/release_notes.md:1412 msgid "" "[CI] Enable Ascend CI to actively monitor and improve quality for vLLM on" " Ascend. [#3](https://github.com/vllm-project/vllm-ascend/pull/3)" -msgstr "[CI] 启用 Ascend CI,以主动监控并提升 vLLM 在 Ascend 上的质量。 [#3](https://github.com/vllm-project/vllm-ascend/pull/3)" +msgstr "" +"[CI] 启用 Ascend CI,以主动监控并提升 vLLM 在 Ascend 上的质量。 [#3](https://github.com" +"/vllm-project/vllm-ascend/pull/3)" #: ../../source/user_guide/release_notes.md:1413 msgid "" "[Docker] Add vllm-ascend container image [#64](https://github.com/vllm-" "project/vllm-ascend/pull/64)" -msgstr "[Docker] 添加 vllm-ascend 容器镜像 [#64](https://github.com/vllm-project/vllm-ascend/pull/64)" +msgstr "" +"[Docker] 添加 vllm-ascend 容器镜像 [#64](https://github.com/vllm-project/vllm-" +"ascend/pull/64)" #: ../../source/user_guide/release_notes.md:1414 msgid "" "[Docs] Add a [live doc](https://vllm-ascend.readthedocs.org) " "[#55](https://github.com/vllm-project/vllm-ascend/pull/55)" -msgstr "[文档] 添加 [在线文档](https://vllm-ascend.readthedocs.org) [#55](https://github.com/vllm-project/vllm-ascend/pull/55)" +msgstr "" +"[文档] 添加 [在线文档](https://vllm-ascend.readthedocs.org) " +"[#55](https://github.com/vllm-project/vllm-ascend/pull/55)" #: ../../source/user_guide/release_notes.md:1418 msgid "" @@ -7180,7 +7893,10 @@ msgid "" "[install](https://github.com/vllm-project/vllm-" "ascend/blob/v0.7.1rc1/docs/source/installation.md) it manually if you are" " using non-container environment." -msgstr "此版本依赖于一个尚未发布的 torch_npu 版本。该版本已预装在官方容器镜像中。如果您使用的是非容器环境,请[手动安装](https://github.com/vllm-project/vllm-ascend/blob/v0.7.1rc1/docs/source/installation.md)。" +msgstr "" +"此版本依赖于一个尚未发布的 torch_npu " +"版本。该版本已预装在官方容器镜像中。如果您使用的是非容器环境,请[手动安装](https://github.com/vllm-project" +"/vllm-ascend/blob/v0.7.1rc1/docs/source/installation.md)。" #: ../../source/user_guide/release_notes.md:1419 msgid "" @@ -7191,7 +7907,12 @@ msgid "" "performance. You can just ignore it. And it has been fixed in this " "[PR](https://github.com/vllm-project/vllm/pull/12432) which will be " "included in v0.7.3 soon." -msgstr "运行 vllm-ascend 时,可能会显示类似 `No platform detected, vLLM is running on UnspecifiedPlatform` 或 `Failed to import from vllm._C with ModuleNotFoundError(\"No module named 'vllm._C'\")` 的日志。这实际上不影响任何功能或性能,您可以忽略它。此问题已在此 [PR](https://github.com/vllm-project/vllm/pull/12432) 中修复,并将很快包含在 v0.7.3 版本中。" +msgstr "" +"运行 vllm-ascend 时,可能会显示类似 `No platform detected, vLLM is running on " +"UnspecifiedPlatform` 或 `Failed to import from vllm._C with " +"ModuleNotFoundError(\"No module named 'vllm._C'\")` " +"的日志。这实际上不影响任何功能或性能,您可以忽略它。此问题已在此 [PR](https://github.com/vllm-" +"project/vllm/pull/12432) 中修复,并将很快包含在 v0.7.3 版本中。" #: ../../source/user_guide/release_notes.md:1420 msgid "" @@ -7200,7 +7921,8 @@ msgid "" " affect any functionality and performance. You can just ignore it. And it" " has been fixed in this [PR](https://github.com/vllm-" "project/vllm/pull/13378) which will be included in v0.7.3 soon." -msgstr "运行 vllm-ascend 时,可能会显示类似 `# CPU blocks: 35064, # CPU blocks: 2730` 的日志,实际应为 `# NPU blocks:`。这实际上不影响任何功能或性能,您可以忽略它。此问题已在此 [PR](https://github.com/vllm-project/vllm/pull/13378) 中修复,并将很快包含在 v0.7.3 版本中。" - -#~ msgid "Known issues" -#~ msgstr "已知问题" +msgstr "" +"运行 vllm-ascend 时,可能会显示类似 `# CPU blocks: 35064, # CPU blocks: 2730` " +"的日志,实际应为 `# NPU blocks:`。这实际上不影响任何功能或性能,您可以忽略它。此问题已在此 " +"[PR](https://github.com/vllm-project/vllm/pull/13378) 中修复,并将很快包含在 v0.7.3 " +"版本中。"