diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po index 78d0fde1..9c2fa065 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po @@ -134,7 +134,7 @@ msgstr "" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:31 msgid "1. Combining KV Cache Pool with on-chip memory Prefix Caching" -msgstr "1. 将 KV 缓存池与片上内存前缀缓存结合" +msgstr "1.将 KV 缓存池与片上内存前缀缓存结合" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:33 msgid "" @@ -182,7 +182,7 @@ msgstr "将 KV 池中的 KV 缓存加载到片上内存后,剩余过程与片 #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:46 msgid "2. Combining KV Cache Pool with Mooncake PD Disaggregation" -msgstr "2. 将 KV 缓存池与 Mooncake PD 解耦结合" +msgstr "2.将 KV 缓存池与 Mooncake PD 解耦结合" #: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:48 msgid "" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po index b31960da..4f91e790 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po @@ -96,7 +96,7 @@ msgstr "工作原理" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:29 msgid "1. Design Approach" -msgstr "1. 设计思路" +msgstr "1.设计思路" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:31 msgid "" @@ -110,7 +110,7 @@ msgstr "" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:33 msgid "2. Implementation Design" -msgstr "2. 实现设计" +msgstr "2.实现设计" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:35 msgid "" @@ -246,7 +246,7 @@ msgstr "**MooncakeConnectorWorker**:用于在工作进程中管理 KV 缓存 #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:64 msgid "4. Specifications Design" -msgstr "4. 规格设计" +msgstr "4.规格设计" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:66 msgid "" @@ -322,7 +322,7 @@ msgstr "DFX 分析" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:87 msgid "1. Config Parameter Validation" -msgstr "1. 配置参数验证" +msgstr "1.配置参数验证" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:89 msgid "" @@ -335,7 +335,7 @@ msgstr "" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:91 msgid "2. Port Conflict Detection" -msgstr "2. 端口冲突检测" +msgstr "2.端口冲突检测" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:93 msgid "" @@ -348,7 +348,7 @@ msgstr "" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:95 msgid "3. PD Ratio Validation" -msgstr "3. PD 比例验证" +msgstr "3.PD 比例验证" #: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:97 msgid "" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po index 034ab42e..87e9d84f 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po @@ -434,6 +434,10 @@ msgstr "" msgid "Consistency" msgstr "一致性" +#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:236 +msgid "Expert Map" +msgstr "专家映射" + #: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:237 msgid "" "The expert map must be globally unique during initialization and update. " diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po index e58e6652..e421ef02 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po @@ -38,7 +38,7 @@ msgstr "在线服务器" #: ../../source/developer_guide/evaluation/using_ais_bench.md:7 msgid "1. Start the vLLM server" -msgstr "1. 启动 vLLM 服务器" +msgstr "1.启动 vLLM 服务器" #: ../../source/developer_guide/evaluation/using_ais_bench.md:9 msgid "You can run docker container to start the vLLM server on a single NPU:" @@ -60,7 +60,7 @@ msgstr "如果看到如下日志,则 vLLM 服务器启动成功:" #: ../../source/developer_guide/evaluation/using_ais_bench.md:56 msgid "2. Run different datasets using AISBench" -msgstr "2. 使用 AISBench 运行不同数据集" +msgstr "2.使用 AISBench 运行不同数据集" #: ../../source/developer_guide/evaluation/using_ais_bench.md:58 msgid "Install AISBench" @@ -227,7 +227,7 @@ msgstr "执行后,您可以从保存的文件中获取结果,示例如下: #: ../../source/developer_guide/evaluation/using_ais_bench.md:300 msgid "3. Troubleshooting" -msgstr "3. 故障排除" +msgstr "3.故障排除" #: ../../source/developer_guide/evaluation/using_ais_bench.md:302 msgid "Invalid Image Path Error" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po index cc4b49d7..ccbdf895 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po @@ -28,7 +28,7 @@ msgstr "" #: ../../source/developer_guide/evaluation/using_evalscope.md:5 msgid "1. Online server" -msgstr "1. 在线服务器" +msgstr "1.在线服务器" #: ../../source/developer_guide/evaluation/using_evalscope.md:7 msgid "You can run docker container to start the vLLM server on a single NPU:" @@ -48,7 +48,7 @@ msgstr "服务器启动后,你可以在新的终端中使用输入提示词查 #: ../../source/developer_guide/evaluation/using_evalscope.md:56 msgid "2. Install EvalScope using pip" -msgstr "2. 使用 pip 安装 EvalScope" +msgstr "2.使用 pip 安装 EvalScope" #: ../../source/developer_guide/evaluation/using_evalscope.md:58 msgid "You can install EvalScope as follows:" @@ -56,7 +56,7 @@ msgstr "你可以通过以下方式安装 EvalScope:" #: ../../source/developer_guide/evaluation/using_evalscope.md:66 msgid "3. Run GSM8K using EvalScope for accuracy testing" -msgstr "3. 使用 EvalScope 运行 GSM8K 进行精度测试" +msgstr "3.使用 EvalScope 运行 GSM8K 进行精度测试" #: ../../source/developer_guide/evaluation/using_evalscope.md:68 msgid "" @@ -81,7 +81,7 @@ msgstr "" #: ../../source/developer_guide/evaluation/using_evalscope.md:92 msgid "4. Run model inference stress testing using EvalScope" -msgstr "4. 使用 EvalScope 运行模型推理压力测试" +msgstr "4.使用 EvalScope 运行模型推理压力测试" #: ../../source/developer_guide/evaluation/using_evalscope.md:94 msgid "Install EvalScope[perf] using pip" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po index ee01492d..c6856465 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po @@ -33,7 +33,7 @@ msgstr "在线服务器" #: ../../source/developer_guide/evaluation/using_lm_eval.md:7 msgid "1. Start the vLLM server" -msgstr "1. 启动 vLLM 服务器" +msgstr "1.启动 vLLM 服务器" #: ../../source/developer_guide/evaluation/using_lm_eval.md:9 msgid "You can run docker container to start the vLLM server on a single NPU:" @@ -48,7 +48,7 @@ msgid "" "2. Run GSM8K using the vLLM server (curl) and then run lm-eval for " "accuracy testing" msgstr "" -"2. 使用 vLLM 服务器(curl)运行 GSM8K,然后运行 lm-eval 进行准确率测试" +"2.使用 vLLM 服务器(curl)运行 GSM8K,然后运行 lm-eval 进行准确率测试" #: ../../source/developer_guide/evaluation/using_lm_eval.md:48 msgid "You can query the result with input prompts:" @@ -90,7 +90,7 @@ msgstr "离线服务器" #: ../../source/developer_guide/evaluation/using_lm_eval.md:145 msgid "1. Run docker container" -msgstr "1. 运行 docker 容器" +msgstr "1.运行 docker 容器" #: ../../source/developer_guide/evaluation/using_lm_eval.md:147 msgid "You can run docker container on a single NPU:" @@ -98,7 +98,7 @@ msgstr "您可以在单个 NPU 上运行 docker 容器:" #: ../../source/developer_guide/evaluation/using_lm_eval.md:175 msgid "2. Run GSM8K using lm-eval for accuracy testing" -msgstr "2. 使用 lm-eval 运行 GSM8K 进行准确率测试" +msgstr "2.使用 lm-eval 运行 GSM8K 进行准确率测试" #: ../../source/developer_guide/evaluation/using_lm_eval.md:203 msgid "After 1 to 2 minutes, the output is shown below:" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po index 4c860d0e..be6d0247 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po @@ -33,7 +33,7 @@ msgstr "" #: ../../source/developer_guide/evaluation/using_opencompass.md:5 msgid "1. Online Server" -msgstr "1. 在线服务" +msgstr "1.在线服务" #: ../../source/developer_guide/evaluation/using_opencompass.md:7 msgid "You can run a docker container to start the vLLM server on a single NPU:" @@ -53,7 +53,7 @@ msgstr "服务器启动后,你可以在新的终端中使用输入提示词来 msgid "" "2. Run C-Eval (a Chinese language model evaluation benchmark) using " "OpenCompass for accuracy testing" -msgstr "2. 使用 OpenCompass 运行 C-Eval 进行准确率测试" +msgstr "2.使用 OpenCompass 运行 C-Eval 进行准确率测试" #: ../../source/developer_guide/evaluation/using_opencompass.md:58 msgid "" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po b/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po index d0e2f65f..9901e8c0 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po @@ -49,7 +49,7 @@ msgstr "通用常见问题" #: ../../source/faqs.md:10 msgid "1. What devices are currently supported?" -msgstr "1. 目前支持哪些设备?" +msgstr "1.目前支持哪些设备?" #: ../../source/faqs.md:12 msgid "" @@ -115,7 +115,7 @@ msgstr "" #: ../../source/faqs.md:28 msgid "2. How to get our docker containers?" -msgstr "2. 如何获取我们的 Docker 容器?" +msgstr "2.如何获取我们的 Docker 容器?" #: ../../source/faqs.md:30 msgid "" @@ -154,7 +154,7 @@ msgstr "**在无互联网访问权限的环境中导入 Docker 镜像:**" #: ../../source/faqs.md:70 msgid "3. What models does vllm-ascend supports?" -msgstr "3. vllm-ascend 支持哪些模型?" +msgstr "3.vllm-ascend 支持哪些模型?" #: ../../source/faqs.md:72 msgid "" @@ -164,7 +164,7 @@ msgstr "更多详细信息请参见[此处](https://docs.vllm.ai/projects #: ../../source/faqs.md:74 msgid "4. How to get in touch with our community?" -msgstr "4. 如何与我们的社区取得联系?" +msgstr "4.如何与我们的社区取得联系?" #: ../../source/faqs.md:76 msgid "" @@ -205,7 +205,7 @@ msgstr "" #: ../../source/faqs.md:83 msgid "5. What features does vllm-ascend V1 supports?" -msgstr "5. vllm-ascend V1 支持哪些功能?" +msgstr "5.vllm-ascend V1 支持哪些功能?" #: ../../source/faqs.md:85 msgid "" @@ -217,7 +217,7 @@ msgstr "更多详细信息请参见[此处](https://docs.vllm.ai/projects msgid "" "6. How to solve the problem of \"Failed to infer device type\" or " "\"libatb.so: cannot open shared object file\"?" -msgstr "6. 如何解决“无法推断设备类型”或“libatb.so:无法打开共享对象文件”的问题?" +msgstr "6.如何解决“无法推断设备类型”或“libatb.so:无法打开共享对象文件”的问题?" #: ../../source/faqs.md:89 msgid "" @@ -251,7 +251,7 @@ msgstr "如果以上所有步骤都无法解决问题,请随时提交一个 Gi #: ../../source/faqs.md:105 msgid "7. How vllm-ascend work with vLLM?" -msgstr "7. vllm-ascend 如何与 vLLM 协同工作?" +msgstr "7.vllm-ascend 如何与 vLLM 协同工作?" #: ../../source/faqs.md:107 msgid "" @@ -266,7 +266,7 @@ msgstr "" #: ../../source/faqs.md:109 msgid "8. Does vllm-ascend support Prefill Disaggregation feature?" -msgstr "8. vllm-ascend 是否支持 Prefill Disaggregation 功能?" +msgstr "8.vllm-ascend 是否支持 Prefill Disaggregation 功能?" #: ../../source/faqs.md:111 msgid "" @@ -280,7 +280,7 @@ msgstr "" #: ../../source/faqs.md:113 msgid "9. Does vllm-ascend support quantization method?" -msgstr "9. vllm-ascend 是否支持量化方法?" +msgstr "9.vllm-ascend 是否支持量化方法?" #: ../../source/faqs.md:115 msgid "" @@ -290,7 +290,7 @@ msgstr "目前,vllm-ascend 已支持 w8a8、w4a8 和 w4a4 量化方法。" #: ../../source/faqs.md:117 msgid "10. How is vllm-ascend tested?" -msgstr "10. vllm-ascend 是如何测试的?" +msgstr "10.vllm-ascend 是如何测试的?" #: ../../source/faqs.md:119 msgid "" @@ -339,7 +339,7 @@ msgstr "对于每个版本,我们未来都将发布性能测试和准确性测 #: ../../source/faqs.md:131 msgid "11. How to fix the error \"InvalidVersion\" when using vllm-ascend?" -msgstr "11. 使用 vllm-ascend 时如何修复 \"InvalidVersion\" 错误?" +msgstr "11.使用 vllm-ascend 时如何修复 \"InvalidVersion\" 错误?" #: ../../source/faqs.md:133 msgid "" @@ -356,7 +356,7 @@ msgstr "" #: ../../source/faqs.md:135 msgid "12. How to handle the out-of-memory issue?" -msgstr "12. 如何处理内存不足问题?" +msgstr "12.如何处理内存不足问题?" #: ../../source/faqs.md:137 msgid "" @@ -410,7 +410,7 @@ msgstr "" #: ../../source/faqs.md:147 msgid "13. Failed to enable NPU graph mode when running DeepSeek" -msgstr "13. 运行 DeepSeek 时无法启用 NPU 图模式" +msgstr "13.运行 DeepSeek 时无法启用 NPU 图模式" #: ../../source/faqs.md:149 msgid "" @@ -438,7 +438,7 @@ msgstr "" msgid "" "14. Failed to reinstall vllm-ascend from source after uninstalling vllm-" "ascend" -msgstr "14. 卸载 vllm-ascend 后无法从源码重新安装 vllm-ascend" +msgstr "14.卸载 vllm-ascend 后无法从源码重新安装 vllm-ascend" #: ../../source/faqs.md:160 msgid "" @@ -452,7 +452,7 @@ msgstr "" #: ../../source/faqs.md:162 msgid "15. How to generate deterministic results when using vllm-ascend?" -msgstr "15. 使用 vllm-ascend 时如何生成确定性结果?" +msgstr "15.使用 vllm-ascend 时如何生成确定性结果?" #: ../../source/faqs.md:164 msgid "There are several factors that affect output determinism:" @@ -473,7 +473,7 @@ msgid "" "16. How to fix the error \"ImportError: Please install vllm[audio] for " "audio support\" for the Qwen2.5-Omni model?" msgstr "" -"16. 对于 Qwen2.5-Omni 模型,如何修复 \"ImportError: Please install vllm[audio] for" +"16.对于 Qwen2.5-Omni 模型,如何修复 \"ImportError: Please install vllm[audio] for" " audio support\" 错误?" #: ../../source/faqs.md:202 @@ -493,7 +493,7 @@ msgstr "" msgid "" "17. How to troubleshoot and resolve size capture failures resulting from " "stream resource exhaustion, and what are the underlying causes?" -msgstr "17. 如何排查和解决因流资源耗尽导致的尺寸捕获失败,其根本原因是什么?" +msgstr "17.如何排查和解决因流资源耗尽导致的尺寸捕获失败,其根本原因是什么?" #: ../../source/faqs.md:213 msgid "Recommended mitigation strategies:" @@ -531,7 +531,7 @@ msgstr "" #: ../../source/faqs.md:221 msgid "18. How to install custom version of torch_npu?" -msgstr "18. 如何安装自定义版本的 torch_npu?" +msgstr "18.如何安装自定义版本的 torch_npu?" #: ../../source/faqs.md:223 msgid "" @@ -546,7 +546,7 @@ msgstr "" msgid "" "19. On certain systems (e.g., Kylin OS), `docker pull` may fail with an " "`invalid tar header` error" -msgstr "19. 在某些系统上(例如 Kylin OS),`docker pull` 可能因 `invalid tar header` 错误而失败" +msgstr "19.在某些系统上(例如 Kylin OS),`docker pull` 可能因 `invalid tar header` 错误而失败" #: ../../source/faqs.md:227 msgid "" @@ -581,7 +581,7 @@ msgstr "将 `vllm_ascend_.tar` 文件(其中 `` 是你使用的镜 msgid "" "20. Why am I getting an error when executing the script to start a Docker" " container? The error message is: \"operation not permitted\"" -msgstr "20. 为什么执行启动 Docker 容器的脚本时会出错?错误信息是:\"operation not permitted\"" +msgstr "20.为什么执行启动 Docker 容器的脚本时会出错?错误信息是:\"operation not permitted\"" #: ../../source/faqs.md:254 msgid "" @@ -598,7 +598,7 @@ msgstr "" #: ../../source/faqs.md:256 msgid "21. How to achieve low latency in a small batch scenario?" -msgstr "21. 如何在小批量场景下实现低延迟?" +msgstr "21.如何在小批量场景下实现低延迟?" #: ../../source/faqs.md:258 msgid "" @@ -636,7 +636,7 @@ msgstr "" msgid "" "22. How to set `SOC_VERSION` when building from source on a CPU-only " "machine?" -msgstr "22. 在仅含 CPU 的机器上从源码构建时,如何设置 `SOC_VERSION`?" +msgstr "22.在仅含 CPU 的机器上从源码构建时,如何设置 `SOC_VERSION`?" #: ../../source/faqs.md:271 msgid "" @@ -654,7 +654,7 @@ msgstr "你可以参考 `Dockerfile*` 中的默认值。例如:" #: ../../source/faqs.md:289 msgid "23. Compilation error occasionally encounters with triton-ascend" -msgstr "23. triton-ascend 偶尔遇到编译错误" +msgstr "23.triton-ascend 偶尔遇到编译错误" #: ../../source/faqs.md:291 msgid "" @@ -670,7 +670,7 @@ msgstr "" #: ../../source/faqs.md:300 msgid "24. Why TPOT increases drastically as concurrency grows?" -msgstr "24. 为什么 TPOT 随着并发增长而急剧增加?" +msgstr "24.为什么 TPOT 随着并发增长而急剧增加?" #: ../../source/faqs.md:302 msgid "" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/installation.po b/docs/source/locale/zh_CN/LC_MESSAGES/installation.po index bdc91b07..258ceae6 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/installation.po @@ -470,11 +470,11 @@ msgstr "互连验证" #: ../../source/installation.md:376 msgid "1. Get NPU IP Addresses" -msgstr "1. 获取 NPU IP 地址" +msgstr "1.获取 NPU IP 地址" #: ../../source/installation.md:399 msgid "2. Cross-Node PING Test" -msgstr "2. 跨节点 PING 测试" +msgstr "2.跨节点 PING 测试" #: ../../source/installation.md:406 msgid "Run Container In Each Node" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po index 89399848..ab67ccc5 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po @@ -285,7 +285,7 @@ msgstr "运行 Mooncake Master" #: ../../source/user_guide/feature_guide/kv_pool.md:109 msgid "1.Configure mooncake.json" -msgstr "1. 配置 mooncake.json" +msgstr "1.配置 mooncake.json" #: ../../source/user_guide/feature_guide/kv_pool.md:111 msgid "" @@ -307,7 +307,7 @@ msgstr "" #: ../../source/user_guide/feature_guide/kv_pool.md:129 msgid "2.Start mooncake_master" -msgstr "2. 启动 mooncake_master" +msgstr "2.启动 mooncake_master" #: ../../source/user_guide/feature_guide/kv_pool.md:131 msgid "Under the mooncake folder:" @@ -335,7 +335,7 @@ msgstr "PD 解耦场景" #: ../../source/user_guide/feature_guide/kv_pool.md:142 #: ../../source/user_guide/feature_guide/kv_pool.md:605 msgid "1.Run `prefill` Node and `decode` Node" -msgstr "1. 运行 `prefill` 节点和 `decode` 节点" +msgstr "1.运行 `prefill` 节点和 `decode` 节点" #: ../../source/user_guide/feature_guide/kv_pool.md:144 msgid "" @@ -392,7 +392,7 @@ msgstr "将 localhost 更改为您的实际 IP 地址。" #: ../../source/user_guide/feature_guide/kv_pool.md:321 msgid "3.Run Inference" -msgstr "3. 运行推理" +msgstr "3.运行推理" #: ../../source/user_guide/feature_guide/kv_pool.md:323 msgid "" @@ -417,7 +417,7 @@ msgstr "PD混合推理" #: ../../source/user_guide/feature_guide/kv_pool.md:339 #: ../../source/user_guide/feature_guide/kv_pool.md:916 msgid "1.Run Mixed Department Script" -msgstr "1. 运行混合部署脚本" +msgstr "1.运行混合部署脚本" #: ../../source/user_guide/feature_guide/kv_pool.md:345 #: ../../source/user_guide/feature_guide/kv_pool.md:1056 @@ -426,7 +426,7 @@ msgstr "pd_mix.sh 内容:" #: ../../source/user_guide/feature_guide/kv_pool.md:384 msgid "2.Run Inference" -msgstr "2. 运行推理" +msgstr "2.运行推理" #: ../../source/user_guide/feature_guide/kv_pool.md:386 msgid "" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po index 99ab6f33..70d304d6 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po @@ -484,7 +484,7 @@ msgstr "常见问题" #: ../../source/user_guide/feature_guide/large_scale_ep.md:498 msgid "1. Prefiller nodes need to warm up" -msgstr "1. 预填充节点需要预热" +msgstr "1.预填充节点需要预热" #: ../../source/user_guide/feature_guide/large_scale_ep.md:500 msgid "" diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po index 0b128768..8975f1dc 100644 --- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po +++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po @@ -286,11 +286,11 @@ msgstr "运行前替换 `` `<...>` `` 中的部分。" #: ../../source/user_guide/feature_guide/rfork.md:70 msgid "1. Install YuanRong TransferEngine" -msgstr "1. 安装 YuanRong TransferEngine" +msgstr "1.安装 YuanRong TransferEngine" #: ../../source/user_guide/feature_guide/rfork.md:76 msgid "2. Start the Planner" -msgstr "2. 启动规划器" +msgstr "2.启动规划器" #: ../../source/user_guide/feature_guide/rfork.md:78 msgid "" @@ -300,7 +300,7 @@ msgstr "在 [`rfork_planner.py`](../../../../examples/rfork/rfork_planner.py) #: ../../source/user_guide/feature_guide/rfork.md:86 msgid "3. Start vLLM Instances" -msgstr "3. 启动 vLLM 实例" +msgstr "3.启动 vLLM 实例" #: ../../source/user_guide/feature_guide/rfork.md:88 msgid "" diff --git a/docs/source/tutorials/models/DeepSeek-V3.2.md b/docs/source/tutorials/models/DeepSeek-V3.2.md index 65782c2f..e6c62fb0 100644 --- a/docs/source/tutorials/models/DeepSeek-V3.2.md +++ b/docs/source/tutorials/models/DeepSeek-V3.2.md @@ -526,7 +526,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export ASCEND_RT_VISIBLE_DEVICES=$1 @@ -600,7 +601,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export ASCEND_RT_VISIBLE_DEVICES=$1 @@ -676,7 +678,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export TASK_QUEUE_ENABLE=1 @@ -752,7 +755,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export TASK_QUEUE_ENABLE=1 diff --git a/docs/source/tutorials/models/GLM4.x.md b/docs/source/tutorials/models/GLM4.x.md index a167ecec..60273801 100644 --- a/docs/source/tutorials/models/GLM4.x.md +++ b/docs/source/tutorials/models/GLM4.x.md @@ -530,6 +530,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export TASK_QUEUE_ENABLE=1 export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/mooncake:$LD_LIBRARY_PATH export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1 @@ -598,6 +600,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export TASK_QUEUE_ENABLE=1 export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/mooncake:$LD_LIBRARY_PATH export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1 diff --git a/docs/source/tutorials/models/GLM5.md b/docs/source/tutorials/models/GLM5.md index 0bf9a5ea..86ec5de7 100644 --- a/docs/source/tutorials/models/GLM5.md +++ b/docs/source/tutorials/models/GLM5.md @@ -766,7 +766,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export ASCEND_RT_VISIBLE_DEVICES=$1 export VLLM_ASCEND_ENABLE_FLASHCOMM1=1 @@ -844,7 +845,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export ASCEND_RT_VISIBLE_DEVICES=$1 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True @@ -926,7 +928,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export TASK_QUEUE_ENABLE=1 @@ -1007,7 +1010,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export TASK_QUEUE_ENABLE=1 @@ -1088,7 +1092,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export TASK_QUEUE_ENABLE=1 @@ -1169,7 +1174,8 @@ Before you start, please export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 export ASCEND_A3_ENABLE=1 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export TASK_QUEUE_ENABLE=1 diff --git a/docs/source/tutorials/models/Qwen3.5-397B-A17B.md b/docs/source/tutorials/models/Qwen3.5-397B-A17B.md index fa7fadb4..53d524e5 100644 --- a/docs/source/tutorials/models/Qwen3.5-397B-A17B.md +++ b/docs/source/tutorials/models/Qwen3.5-397B-A17B.md @@ -288,7 +288,8 @@ To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need to depl # jemalloc is for better performance, if `libjemalloc.so` is installed on your machine, you can turn it on. # export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libjemalloc.so.2:$LD_PRELOAD export VLLM_ENGINE_READY_TIMEOUT_S=30000 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=30000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export IP_ADDRESS=$local_ip export NETWORK_CARD_NAME=$nic_name export HCCL_IF_IP=$IP_ADDRESS @@ -362,7 +363,8 @@ To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need to depl node0_ip="xxxx" export VLLM_ENGINE_READY_TIMEOUT_S=30000 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=30000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export MASTER_IP_ADDRESS=$node0_ip export IP_ADDRESS=$local_ip @@ -442,7 +444,8 @@ To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need to depl node0_ip="xxxx" export VLLM_ENGINE_READY_TIMEOUT_S=30000 - export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=30000 + # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. + export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480 export MASTER_IP_ADDRESS=$node0_ip export IP_ADDRESS=$local_ip diff --git a/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml b/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml index 1021a8db..0878245e 100644 --- a/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml +++ b/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml @@ -13,7 +13,7 @@ env_common: HCCL_DETERMINISTIC: True TASK_QUEUE_ENABLE: 1 HCCL_OP_RETRY_ENABLE: "L0:0, L1:0" - VLLM_NIXL_ABORT_REQUEST_TIMEOUT: 300000 + VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 480 disaggregated_prefill: enabled: true diff --git a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml index e1b0a12c..b3d47ca0 100644 --- a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml +++ b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml @@ -15,7 +15,7 @@ env_common: ASCEND_TRANSPORT_PRINT: 1 ACL_OP_INIT_MODE: 1 ASCEND_A3_ENABLE: 1 - VLLM_NIXL_ABORT_REQUEST_TIMEOUT: 300000 + VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 480 VLLM_ENGINE_READY_TIMEOUT_S: 1800 HCCL_CONNECT_TIMEOUT: 1200 HCCL_INTRA_PCIE_ENABLE: 1 diff --git a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml index 3c33d40b..9c46bb56 100644 --- a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml +++ b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml @@ -15,7 +15,7 @@ env_common: ASCEND_TRANSPORT_PRINT: 1 ACL_OP_INIT_MODE: 1 ASCEND_A3_ENABLE: 1 - VLLM_NIXL_ABORT_REQUEST_TIMEOUT: 300000 + VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 480 VLLM_ENGINE_READY_TIMEOUT_S: 1800 HCCL_CONNECT_TIMEOUT: 1200 HCCL_INTRA_PCIE_ENABLE: 1 diff --git a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py index 1720c1b3..59a6fa7e 100644 --- a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py +++ b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py @@ -173,7 +173,7 @@ class KVCacheTaskTracker: while self.delayed_free_requests: request_id = next(iter(self.delayed_free_requests)) delay_start_time = self.delayed_free_requests[request_id] - if current_time - delay_start_time > envs.VLLM_NIXL_ABORT_REQUEST_TIMEOUT: + if current_time - delay_start_time > envs.VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: self.delayed_free_requests.popitem(last=False) self.reqs_to_process.discard(request_id) expired_requests.add(request_id)