From ff76c6780e6b4d5742ab6686654527cb4657d290 Mon Sep 17 00:00:00 2001 From: herizhen <59841270+herizhen@users.noreply.github.com> Date: Thu, 23 Apr 2026 16:23:31 +0800 Subject: [PATCH] [releases/v0.18.0][Doc][Misc] Modifying Configuration Parameters (#8618) ### What this PR does / why we need it? This PR renames the environment variable VLLM_NIXL_ABORT_REQUEST_TIMEOUT to VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT to align with the Mooncake connector naming convention. It also updates the documentation and test configurations to reflect this change and adjusts the suggested timeout value in the documentation to 480 seconds for consistency. ### Does this PR introduce _any_ user-facing change? Yes. The environment variable for configuring the abort request timeout has been renamed. Users should update their environment settings from VLLM_NIXL_ABORT_REQUEST_TIMEOUT to VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT. ### How was this patch tested? The changes were verified by updating the corresponding test configuration files and ensuring consistency across the documentation. --------- Signed-off-by: herizhen <1270637059@qq.com> Signed-off-by: herizhen <59841270+herizhen@users.noreply.github.com> --- .../Design_Documents/KV_Cache_Pool_Guide.po | 4 +- .../Design_Documents/disaggregated_prefill.po | 12 ++--- .../Design_Documents/eplb_swift_balancer.po | 4 ++ .../evaluation/using_ais_bench.po | 6 +-- .../evaluation/using_evalscope.po | 8 ++-- .../evaluation/using_lm_eval.po | 8 ++-- .../evaluation/using_opencompass.po | 4 +- docs/source/locale/zh_CN/LC_MESSAGES/faqs.po | 48 +++++++++---------- .../locale/zh_CN/LC_MESSAGES/installation.po | 4 +- .../user_guide/feature_guide/kv_pool.po | 12 ++--- .../feature_guide/large_scale_ep.po | 2 +- .../user_guide/feature_guide/rfork.po | 6 +-- docs/source/tutorials/models/DeepSeek-V3.2.md | 12 +++-- docs/source/tutorials/models/GLM4.x.md | 4 ++ docs/source/tutorials/models/GLM5.md | 18 ++++--- .../tutorials/models/Qwen3.5-397B-A17B.md | 9 ++-- .../config/DeepSeek-R1-W8A8-longseq.yaml | 2 +- .../DeepSeek-V3_2-W8A8-EP-aime2025.yaml | 2 +- .../config/DeepSeek-V3_2-W8A8-EP.yaml | 2 +- .../kv_transfer/kv_p2p/mooncake_connector.py | 2 +- 20 files changed, 95 insertions(+), 74 deletions(-) diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po index 78d0fde1..9c2fa065 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po @@ -134,7 +134,7 @@ msgstr "" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:31 msgid "1. Combining KV Cache Pool with on-chip memory Prefix Caching" -msgstr "1. 将 KV 缓存池与片上内存前缀缓存结合" +msgstr "1.将 KV 缓存池与片上内存前缀缓存结合" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:33 msgid "" @@ -182,7 +182,7 @@ msgstr "将 KV 池中的 KV 缓存加载到片上内存后,剩余过程与片 #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:46 msgid "2. Combining KV Cache Pool with Mooncake PD Disaggregation" -msgstr "2. 将 KV 缓存池与 Mooncake PD 解耦结合" +msgstr "2.将 KV 缓存池与 Mooncake PD 解耦结合" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:48 msgid "" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po index b31960da..4f91e790 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po @@ -96,7 +96,7 @@ msgstr "工作原理" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:29 msgid "1. Design Approach" -msgstr "1. 设计思路" +msgstr "1.设计思路" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:31 msgid "" @@ -110,7 +110,7 @@ msgstr "" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:33 msgid "2. Implementation Design" -msgstr "2. 实现设计" +msgstr "2.实现设计" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:35 msgid "" @@ -246,7 +246,7 @@ msgstr "**MooncakeConnectorWorker**:用于在工作进程中管理 KV 缓存 #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:64 msgid "4. Specifications Design" -msgstr "4. 规格设计" +msgstr "4.规格设计" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:66 msgid "" @@ -322,7 +322,7 @@ msgstr "DFX 分析" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:87 msgid "1. Config Parameter Validation" -msgstr "1. 配置参数验证" +msgstr "1.配置参数验证" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:89 msgid "" @@ -335,7 +335,7 @@ msgstr "" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:91 msgid "2. Port Conflict Detection" -msgstr "2. 端口冲突检测" +msgstr "2.端口冲突检测" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:93 msgid "" @@ -348,7 +348,7 @@ msgstr "" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:95 msgid "3. PD Ratio Validation" -msgstr "3. PD 比例验证" +msgstr "3.PD 比例验证" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:97 msgid "" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po index 034ab42e..87e9d84f 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po @@ -434,6 +434,10 @@ msgstr "" msgid "Consistency" msgstr "一致性" +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:236 +msgid "Expert Map" +msgstr "专家映射" + #: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:237 msgid "" "The expert map must be globally unique during initialization and update. " diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po index e58e6652..e421ef02 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po @@ -38,7 +38,7 @@ msgstr "在线服务器" #: ../../source/developer_guide/evaluation/using_ais_bench.md:7 msgid "1. Start the vLLM server" -msgstr "1. 启动 vLLM 服务器" +msgstr "1.启动 vLLM 服务器" #: ../../source/developer_guide/evaluation/using_ais_bench.md:9 msgid "You can run docker container to start the vLLM server on a single NPU:" @@ -60,7 +60,7 @@ msgstr "如果看到如下日志,则 vLLM 服务器启动成功:" #: ../../source/developer_guide/evaluation/using_ais_bench.md:56 msgid "2. Run different datasets using AISBench" -msgstr "2. 使用 AISBench 运行不同数据集" +msgstr "2.使用 AISBench 运行不同数据集" #: ../../source/developer_guide/evaluation/using_ais_bench.md:58 msgid "Install AISBench" @@ -227,7 +227,7 @@ msgstr "执行后,您可以从保存的文件中获取结果,示例如下: #: ../../source/developer_guide/evaluation/using_ais_bench.md:300 msgid "3. Troubleshooting" -msgstr "3. 故障排除" +msgstr "3.故障排除" #: ../../source/developer_guide/evaluation/using_ais_bench.md:302 msgid "Invalid Image Path Error" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po index cc4b49d7..ccbdf895 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po @@ -28,7 +28,7 @@ msgstr "" #: ../../source/developer_guide/evaluation/using_evalscope.md:5 msgid "1. Online server" -msgstr "1. 在线服务器" +msgstr "1.在线服务器" #: ../../source/developer_guide/evaluation/using_evalscope.md:7 msgid "You can run docker container to start the vLLM server on a single NPU:" @@ -48,7 +48,7 @@ msgstr "服务器启动后,你可以在新的终端中使用输入提示词查 #: ../../source/developer_guide/evaluation/using_evalscope.md:56 msgid "2. Install EvalScope using pip" -msgstr "2. 使用 pip 安装 EvalScope" +msgstr "2.使用 pip 安装 EvalScope" #: ../../source/developer_guide/evaluation/using_evalscope.md:58 msgid "You can install EvalScope as follows:" @@ -56,7 +56,7 @@ msgstr "你可以通过以下方式安装 EvalScope:" #: ../../source/developer_guide/evaluation/using_evalscope.md:66 msgid "3. Run GSM8K using EvalScope for accuracy testing" -msgstr "3. 使用 EvalScope 运行 GSM8K 进行精度测试" +msgstr "3.使用 EvalScope 运行 GSM8K 进行精度测试" #: ../../source/developer_guide/evaluation/using_evalscope.md:68 msgid "" @@ -81,7 +81,7 @@ msgstr "" #: ../../source/developer_guide/evaluation/using_evalscope.md:92 msgid "4. Run model inference stress testing using EvalScope" -msgstr "4. 使用 EvalScope 运行模型推理压力测试" +msgstr "4.使用 EvalScope 运行模型推理压力测试" #: ../../source/developer_guide/evaluation/using_evalscope.md:94 msgid "Install EvalScope[perf] using pip" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po index ee01492d..c6856465 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po @@ -33,7 +33,7 @@ msgstr "在线服务器" #: ../../source/developer_guide/evaluation/using_lm_eval.md:7 msgid "1. Start the vLLM server" -msgstr "1. 启动 vLLM 服务器" +msgstr "1.启动 vLLM 服务器" #: ../../source/developer_guide/evaluation/using_lm_eval.md:9 msgid "You can run docker container to start the vLLM server on a single NPU:" @@ -48,7 +48,7 @@ msgid "" "2. Run GSM8K using the vLLM server (curl) and then run lm-eval for " "accuracy testing" msgstr "" -"2. 使用 vLLM 服务器(curl)运行 GSM8K,然后运行 lm-eval 进行准确率测试" +"2.使用 vLLM 服务器(curl)运行 GSM8K,然后运行 lm-eval 进行准确率测试" #: ../../source/developer_guide/evaluation/using_lm_eval.md:48 msgid "You can query the result with input prompts:" @@ -90,7 +90,7 @@ msgstr "离线服务器" #: ../../source/developer_guide/evaluation/using_lm_eval.md:145 msgid "1. Run docker container" -msgstr "1. 运行 docker 容器" +msgstr "1.运行 docker 容器" #: ../../source/developer_guide/evaluation/using_lm_eval.md:147 msgid "You can run docker container on a single NPU:" @@ -98,7 +98,7 @@ msgstr "您可以在单个 NPU 上运行 docker 容器:" #: ../../source/developer_guide/evaluation/using_lm_eval.md:175 msgid "2. Run GSM8K using lm-eval for accuracy testing" -msgstr "2. 使用 lm-eval 运行 GSM8K 进行准确率测试" +msgstr "2.使用 lm-eval 运行 GSM8K 进行准确率测试" #: ../../source/developer_guide/evaluation/using_lm_eval.md:203 msgid "After 1 to 2 minutes, the output is shown below:" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po index 4c860d0e..be6d0247 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po @@ -33,7 +33,7 @@ msgstr "" #: ../../source/developer_guide/evaluation/using_opencompass.md:5 msgid "1. Online Server" -msgstr "1. 在线服务" +msgstr "1.在线服务" #: ../../source/developer_guide/evaluation/using_opencompass.md:7 msgid "You can run a docker container to start the vLLM server on a single NPU:" @@ -53,7 +53,7 @@ msgstr "服务器启动后,你可以在新的终端中使用输入提示词来 msgid "" "2. Run C-Eval (a Chinese language model evaluation benchmark) using " "OpenCompass for accuracy testing" -msgstr "2. 使用 OpenCompass 运行 C-Eval 进行准确率测试" +msgstr "2.使用 OpenCompass 运行 C-Eval 进行准确率测试" #: ../../source/developer_guide/evaluation/using_opencompass.md:58 msgid "" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po b/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po index d0e2f65f..9901e8c0 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po @@ -49,7 +49,7 @@ msgstr "通用常见问题" #: ../../source/faqs.md:10 msgid "1. What devices are currently supported?" -msgstr "1. 目前支持哪些设备?" +msgstr "1.目前支持哪些设备?" #: ../../source/faqs.md:12 msgid "" @@ -115,7 +115,7 @@ msgstr "" #: ../../source/faqs.md:28 msgid "2. How to get our docker containers?" -msgstr "2. 如何获取我们的 Docker 容器?" +msgstr "2.如何获取我们的 Docker 容器?" #: ../../source/faqs.md:30 msgid "" @@ -154,7 +154,7 @@ msgstr "**在无互联网访问权限的环境中导入 Docker 镜像:**" #: ../../source/faqs.md:70 msgid "3. What models does vllm-ascend supports?" -msgstr "3. vllm-ascend 支持哪些模型?" +msgstr "3.vllm-ascend 支持哪些模型?" #: ../../source/faqs.md:72 msgid "" @@ -164,7 +164,7 @@ msgstr "更多详细信息请参见[此处](https://docs.vllm.ai/projects #: ../../source/faqs.md:74 msgid "4. How to get in touch with our community?" -msgstr "4. 如何与我们的社区取得联系?" +msgstr "4.如何与我们的社区取得联系?" #: ../../source/faqs.md:76 msgid "" @@ -205,7 +205,7 @@ msgstr "" #: ../../source/faqs.md:83 msgid "5. What features does vllm-ascend V1 supports?" -msgstr "5. vllm-ascend V1 支持哪些功能?" +msgstr "5.vllm-ascend V1 支持哪些功能?" #: ../../source/faqs.md:85 msgid "" @@ -217,7 +217,7 @@ msgstr "更多详细信息请参见[此处](https://docs.vllm.ai/projects msgid "" "6. How to solve the problem of \"Failed to infer device type\" or " "\"libatb.so: cannot open shared object file\"?" -msgstr "6. 如何解决“无法推断设备类型”或“libatb.so:无法打开共享对象文件”的问题?" +msgstr "6.如何解决“无法推断设备类型”或“libatb.so:无法打开共享对象文件”的问题?" #: ../../source/faqs.md:89 msgid "" @@ -251,7 +251,7 @@ msgstr "如果以上所有步骤都无法解决问题,请随时提交一个 Gi #: ../../source/faqs.md:105 msgid "7. How vllm-ascend work with vLLM?" -msgstr "7. vllm-ascend 如何与 vLLM 协同工作?" +msgstr "7.vllm-ascend 如何与 vLLM 协同工作?" #: ../../source/faqs.md:107 msgid "" @@ -266,7 +266,7 @@ msgstr "" #: ../../source/faqs.md:109 msgid "8. Does vllm-ascend support Prefill Disaggregation feature?" -msgstr "8. vllm-ascend 是否支持 Prefill Disaggregation 功能?" +msgstr "8.vllm-ascend 是否支持 Prefill Disaggregation 功能?" #: ../../source/faqs.md:111 msgid "" @@ -280,7 +280,7 @@ msgstr "" #: ../../source/faqs.md:113 msgid "9. Does vllm-ascend support quantization method?" -msgstr "9. vllm-ascend 是否支持量化方法?" +msgstr "9.vllm-ascend 是否支持量化方法?" #: ../../source/faqs.md:115 msgid "" @@ -290,7 +290,7 @@ msgstr "目前,vllm-ascend 已支持 w8a8、w4a8 和 w4a4 量化方法。" #: ../../source/faqs.md:117 msgid "10. How is vllm-ascend tested?" -msgstr "10. vllm-ascend 是如何测试的?" +msgstr "10.vllm-ascend 是如何测试的?" #: ../../source/faqs.md:119 msgid "" @@ -339,7 +339,7 @@ msgstr "对于每个版本,我们未来都将发布性能测试和准确性测 #: ../../source/faqs.md:131 msgid "11. How to fix the error \"InvalidVersion\" when using vllm-ascend?" -msgstr "11. 使用 vllm-ascend 时如何修复 \"InvalidVersion\" 错误?" +msgstr "11.使用 vllm-ascend 时如何修复 \"InvalidVersion\" 错误?" #: ../../source/faqs.md:133 msgid "" @@ -356,7 +356,7 @@ msgstr "" #: ../../source/faqs.md:135 msgid "12. How to handle the out-of-memory issue?" -msgstr "12. 如何处理内存不足问题?" +msgstr "12.如何处理内存不足问题?" #: ../../source/faqs.md:137 msgid "" @@ -410,7 +410,7 @@ msgstr "" #: ../../source/faqs.md:147 msgid "13. Failed to enable NPU graph mode when running DeepSeek" -msgstr "13. 运行 DeepSeek 时无法启用 NPU 图模式" +msgstr "13.运行 DeepSeek 时无法启用 NPU 图模式" #: ../../source/faqs.md:149 msgid "" @@ -438,7 +438,7 @@ msgstr "" msgid "" "14. Failed to reinstall vllm-ascend from source after uninstalling vllm-" "ascend" -msgstr "14. 卸载 vllm-ascend 后无法从源码重新安装 vllm-ascend" +msgstr "14.卸载 vllm-ascend 后无法从源码重新安装 vllm-ascend" #: ../../source/faqs.md:160 msgid "" @@ -452,7 +452,7 @@ msgstr "" #: ../../source/faqs.md:162 msgid "15. How to generate deterministic results when using vllm-ascend?" -msgstr "15. 使用 vllm-ascend 时如何生成确定性结果?" +msgstr "15.使用 vllm-ascend 时如何生成确定性结果?" #: ../../source/faqs.md:164 msgid "There are several factors that affect output determinism:" @@ -473,7 +473,7 @@ msgid "" "16. How to fix the error \"ImportError: Please install vllm[audio] for " "audio support\" for the Qwen2.5-Omni model?" msgstr "" -"16. 对于 Qwen2.5-Omni 模型,如何修复 \"ImportError: Please install vllm[audio] for" +"16.对于 Qwen2.5-Omni 模型,如何修复 \"ImportError: Please install vllm[audio] for" " audio support\" 错误?" #: ../../source/faqs.md:202 @@ -493,7 +493,7 @@ msgstr "" msgid "" "17. How to troubleshoot and resolve size capture failures resulting from " "stream resource exhaustion, and what are the underlying causes?" -msgstr "17. 如何排查和解决因流资源耗尽导致的尺寸捕获失败,其根本原因是什么?" +msgstr "17.如何排查和解决因流资源耗尽导致的尺寸捕获失败,其根本原因是什么?" #: ../../source/faqs.md:213 msgid "Recommended mitigation strategies:" @@ -531,7 +531,7 @@ msgstr "" #: ../../source/faqs.md:221 msgid "18. How to install custom version of torch_npu?" -msgstr "18. 如何安装自定义版本的 torch_npu?" +msgstr "18.如何安装自定义版本的 torch_npu?" #: ../../source/faqs.md:223 msgid "" @@ -546,7 +546,7 @@ msgstr "" msgid "" "19. On certain systems (e.g., Kylin OS), `docker pull` may fail with an " "`invalid tar header` error" -msgstr "19. 在某些系统上(例如 Kylin OS),`docker pull` 可能因 `invalid tar header` 错误而失败" +msgstr "19.在某些系统上(例如 Kylin OS),`docker pull` 可能因 `invalid tar header` 错误而失败" #: ../../source/faqs.md:227 msgid "" @@ -581,7 +581,7 @@ msgstr "将 `vllm_ascend_.tar` 文件(其中 `` 是你使用的镜 msgid "" "20. Why am I getting an error when executing the script to start a Docker" " container? The error message is: \"operation not permitted\"" -msgstr "20. 为什么执行启动 Docker 容器的脚本时会出错?错误信息是:\"operation not permitted\"" +msgstr "20.为什么执行启动 Docker 容器的脚本时会出错?错误信息是:\"operation not permitted\"" #: ../../source/faqs.md:254 msgid "" @@ -598,7 +598,7 @@ msgstr "" #: ../../source/faqs.md:256 msgid "21. How to achieve low latency in a small batch scenario?" -msgstr "21. 如何在小批量场景下实现低延迟?" +msgstr "21.如何在小批量场景下实现低延迟?" #: ../../source/faqs.md:258 msgid "" @@ -636,7 +636,7 @@ msgstr "" msgid "" "22. How to set `SOC_VERSION` when building from source on a CPU-only " "machine?" -msgstr "22. 在仅含 CPU 的机器上从源码构建时,如何设置 `SOC_VERSION`?" +msgstr "22.在仅含 CPU 的机器上从源码构建时,如何设置 `SOC_VERSION`?" #: ../../source/faqs.md:271 msgid "" @@ -654,7 +654,7 @@ msgstr "你可以参考 `Dockerfile*` 中的默认值。例如:" #: ../../source/faqs.md:289 msgid "23. Compilation error occasionally encounters with triton-ascend" -msgstr "23. triton-ascend 偶尔遇到编译错误" +msgstr "23.triton-ascend 偶尔遇到编译错误" #: ../../source/faqs.md:291 msgid "" @@ -670,7 +670,7 @@ msgstr "" #: ../../source/faqs.md:300 msgid "24. Why TPOT increases drastically as concurrency grows?" -msgstr "24. 为什么 TPOT 随着并发增长而急剧增加?" +msgstr "24.为什么 TPOT 随着并发增长而急剧增加?" #: ../../source/faqs.md:302 msgid "" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/installation.po b/docs/source/locale/zh_CN/LC_MESSAGES/installation.po index bdc91b07..258ceae6 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/installation.po @@ -470,11 +470,11 @@ msgstr "互连验证" #: ../../source/installation.md:376 msgid "1. Get NPU IP Addresses" -msgstr "1. 获取 NPU IP 地址" +msgstr "1.获取 NPU IP 地址" #: ../../source/installation.md:399 msgid "2. Cross-Node PING Test" -msgstr "2. 跨节点 PING 测试" +msgstr "2.跨节点 PING 测试" #: ../../source/installation.md:406 msgid "Run Container In Each Node" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po index 89399848..ab67ccc5 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po @@ -285,7 +285,7 @@ msgstr "运行 Mooncake Master" #: ../../source/user_guide/feature_guide/kv_pool.md:109 msgid "1.Configure mooncake.json" -msgstr "1. 配置 mooncake.json" +msgstr "1.配置 mooncake.json" #: ../../source/user_guide/feature_guide/kv_pool.md:111 msgid "" @@ -307,7 +307,7 @@ msgstr "" #: ../../source/user_guide/feature_guide/kv_pool.md:129 msgid "2.Start mooncake_master" -msgstr "2. 启动 mooncake_master" +msgstr "2.启动 mooncake_master" #: ../../source/user_guide/feature_guide/kv_pool.md:131 msgid "Under the mooncake folder:" @@ -335,7 +335,7 @@ msgstr "PD 解耦场景" #: ../../source/user_guide/feature_guide/kv_pool.md:142 #: ../../source/user_guide/feature_guide/kv_pool.md:605 msgid "1.Run `prefill` Node and `decode` Node" -msgstr "1. 运行 `prefill` 节点和 `decode` 节点" +msgstr "1.运行 `prefill` 节点和 `decode` 节点" #: ../../source/user_guide/feature_guide/kv_pool.md:144 msgid "" @@ -392,7 +392,7 @@ msgstr "将 localhost 更改为您的实际 IP 地址。" #: ../../source/user_guide/feature_guide/kv_pool.md:321 msgid "3.Run Inference" -msgstr "3. 运行推理" +msgstr "3.运行推理" #: ../../source/user_guide/feature_guide/kv_pool.md:323 msgid "" @@ -417,7 +417,7 @@ msgstr "PD混合推理" #: ../../source/user_guide/feature_guide/kv_pool.md:339 #: ../../source/user_guide/feature_guide/kv_pool.md:916 msgid "1.Run Mixed Department Script" -msgstr "1. 运行混合部署脚本" +msgstr "1.运行混合部署脚本" #: ../../source/user_guide/feature_guide/kv_pool.md:345 #: ../../source/user_guide/feature_guide/kv_pool.md:1056 @@ -426,7 +426,7 @@ msgstr "pd_mix.sh 内容:" #: ../../source/user_guide/feature_guide/kv_pool.md:384 msgid "2.Run Inference" -msgstr "2. 运行推理" +msgstr "2.运行推理" #: ../../source/user_guide/feature_guide/kv_pool.md:386 msgid "" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po index 99ab6f33..70d304d6 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po @@ -484,7 +484,7 @@ msgstr "常见问题" #: ../../source/user_guide/feature_guide/large_scale_ep.md:498 msgid "1. Prefiller nodes need to warm up" -msgstr "1. 预填充节点需要预热" +msgstr "1.预填充节点需要预热" #: ../../source/user_guide/feature_guide/large_scale_ep.md:500 msgid "" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po index 0b128768..8975f1dc 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po @@ -286,11 +286,11 @@ msgstr "运行前替换 `` `<...>` `` 中的部分。" #: ../../source/user_guide/feature_guide/rfork.md:70 msgid "1. Install YuanRong TransferEngine" -msgstr "1. 安装 YuanRong TransferEngine" +msgstr "1.安装 YuanRong TransferEngine" #: ../../source/user_guide/feature_guide/rfork.md:76 msgid "2. Start the Planner" -msgstr "2. 启动规划器" +msgstr "2.启动规划器" #: ../../source/user_guide/feature_guide/rfork.md:78 msgid "" @@ -300,7 +300,7 @@ msgstr "在 [`rfork_planner.py`](../../../../examples/rfork/rfork_planner.py) #: ../../source/user_guide/feature_guide/rfork.md:86 msgid "3. Start vLLM Instances" -msgstr "3. 启动 vLLM 实例" +msgstr "3.启动 vLLM 实例" #: ../../source/user_guide/feature_guide/rfork.md:88 msgid "" diff --git a/docs/source/tutorials/models/DeepSeek-V3.2.md b/docs/source/tutorials/models/DeepSeek-V3.2.md index 65782c2f..e6c62fb0 100644 --- a/docs/source/tutorials/models/DeepSeek-V3.2.md +++ b/docs/source/tutorials/models/DeepSeek-V3.2.md @@ -526,7 +526,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export ASCEND_RT_VISIBLE_DEVICES=$1 @@ -600,7 +601,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export ASCEND_RT_VISIBLE_DEVICES=$1 @@ -676,7 +678,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export TASK_QUEUE_ENABLE=1 @@ -752,7 +755,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export TASK_QUEUE_ENABLE=1 diff --git a/docs/source/tutorials/models/GLM4.x.md b/docs/source/tutorials/models/GLM4.x.md index a167ecec..60273801 100644 --- a/docs/source/tutorials/models/GLM4.x.md +++ b/docs/source/tutorials/models/GLM4.x.md @@ -530,6 +530,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export TASK_QUEUE_ENABLE=1 export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/mooncake:$LD_LIBRARY_PATH export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1 @@ -598,6 +600,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export TASK_QUEUE_ENABLE=1 export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/mooncake:$LD_LIBRARY_PATH export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1 diff --git a/docs/source/tutorials/models/GLM5.md b/docs/source/tutorials/models/GLM5.md index 0bf9a5ea..86ec5de7 100644 --- a/docs/source/tutorials/models/GLM5.md +++ b/docs/source/tutorials/models/GLM5.md @@ -766,7 +766,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export ASCEND_RT_VISIBLE_DEVICES=$1 export VLLM_ASCEND_ENABLE_FLASHCOMM1=1 @@ -844,7 +845,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export ASCEND_RT_VISIBLE_DEVICES=$1 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True @@ -926,7 +928,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export TASK_QUEUE_ENABLE=1 @@ -1007,7 +1010,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export TASK_QUEUE_ENABLE=1 @@ -1088,7 +1092,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export TASK_QUEUE_ENABLE=1 @@ -1169,7 +1174,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export TASK_QUEUE_ENABLE=1 diff --git a/docs/source/tutorials/models/Qwen3.5-397B-A17B.md b/docs/source/tutorials/models/Qwen3.5-397B-A17B.md index fa7fadb4..53d524e5 100644 --- a/docs/source/tutorials/models/Qwen3.5-397B-A17B.md +++ b/docs/source/tutorials/models/Qwen3.5-397B-A17B.md @@ -288,7 +288,8 @@ To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need to depl # jemalloc is for better performance, if `libjemalloc.so` is installed on your machine, you can turn it on. # export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libjemalloc.so.2:$LD_PRELOAD export VLLM_ENGINE_READY_TIMEOUT_S=30000 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=30000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export IP_ADDRESS=$local_ip export NETWORK_CARD_NAME=$nic_name export HCCL_IF_IP=$IP_ADDRESS @@ -362,7 +363,8 @@ To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need to depl node0_ip="xxxx" export VLLM_ENGINE_READY_TIMEOUT_S=30000 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=30000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export MASTER_IP_ADDRESS=$node0_ip export IP_ADDRESS=$local_ip @@ -442,7 +444,8 @@ To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need to depl node0_ip="xxxx" export VLLM_ENGINE_READY_TIMEOUT_S=30000 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=30000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export MASTER_IP_ADDRESS=$node0_ip export IP_ADDRESS=$local_ip diff --git a/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml b/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml index 1021a8db..0878245e 100644 --- a/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml +++ b/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml @@ -13,7 +13,7 @@ env_common: HCCL_DETERMINISTIC: True TASK_QUEUE_ENABLE: 1 HCCL_OP_RETRY_ENABLE: "L0:0, L1:0" - VLLM_NIXL_ABORT_REQUEST_TIMEOUT: 300000 + VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 480 disaggregated_prefill: enabled: true diff --git a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml index e1b0a12c..b3d47ca0 100644 --- a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml +++ b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml @@ -15,7 +15,7 @@ env_common: ASCEND_TRANSPORT_PRINT: 1 ACL_OP_INIT_MODE: 1 ASCEND_A3_ENABLE: 1 - VLLM_NIXL_ABORT_REQUEST_TIMEOUT: 300000 + VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 480 VLLM_ENGINE_READY_TIMEOUT_S: 1800 HCCL_CONNECT_TIMEOUT: 1200 HCCL_INTRA_PCIE_ENABLE: 1 diff --git a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml index 3c33d40b..9c46bb56 100644 --- a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml +++ b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml @@ -15,7 +15,7 @@ env_common: ASCEND_TRANSPORT_PRINT: 1 ACL_OP_INIT_MODE: 1 ASCEND_A3_ENABLE: 1 - VLLM_NIXL_ABORT_REQUEST_TIMEOUT: 300000 + VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 480 VLLM_ENGINE_READY_TIMEOUT_S: 1800 HCCL_CONNECT_TIMEOUT: 1200 HCCL_INTRA_PCIE_ENABLE: 1 diff --git a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py index 1720c1b3..59a6fa7e 100644 --- a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py +++ b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py @@ -173,7 +173,7 @@ class KVCacheTaskTracker: while self.delayed_free_requests: request_id = next(iter(self.delayed_free_requests)) delay_start_time = self.delayed_free_requests[request_id] - if current_time - delay_start_time > envs.VLLM_NIXL_ABORT_REQUEST_TIMEOUT: + if current_time - delay_start_time > envs.VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: self.delayed_free_requests.popitem(last=False) self.reqs_to_process.discard(request_id) expired_requests.add(request_id)