diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po
index 78d0fde1..9c2fa065 100644
--- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po
@@ -134,7 +134,7 @@ msgstr ""
#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:31
msgid "1. Combining KV Cache Pool with on-chip memory Prefix Caching"
-msgstr "1. 将 KV 缓存池与片上内存前缀缓存结合"
+msgstr "1.将 KV 缓存池与片上内存前缀缓存结合"
#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:33
msgid ""
@@ -182,7 +182,7 @@ msgstr "将 KV 池中的 KV 缓存加载到片上内存后,剩余过程与片
#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:46
msgid "2. Combining KV Cache Pool with Mooncake PD Disaggregation"
-msgstr "2. 将 KV 缓存池与 Mooncake PD 解耦结合"
+msgstr "2.将 KV 缓存池与 Mooncake PD 解耦结合"
#: ../../source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md:48
msgid ""
diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po
index b31960da..4f91e790 100644
--- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po
@@ -96,7 +96,7 @@ msgstr "工作原理"
#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:29
msgid "1. Design Approach"
-msgstr "1. 设计思路"
+msgstr "1.设计思路"
#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:31
msgid ""
@@ -110,7 +110,7 @@ msgstr ""
#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:33
msgid "2. Implementation Design"
-msgstr "2. 实现设计"
+msgstr "2.实现设计"
#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:35
msgid ""
@@ -246,7 +246,7 @@ msgstr "**MooncakeConnectorWorker**:用于在工作进程中管理 KV 缓存
#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:64
msgid "4. Specifications Design"
-msgstr "4. 规格设计"
+msgstr "4.规格设计"
#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:66
msgid ""
@@ -322,7 +322,7 @@ msgstr "DFX 分析"
#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:87
msgid "1. Config Parameter Validation"
-msgstr "1. 配置参数验证"
+msgstr "1.配置参数验证"
#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:89
msgid ""
@@ -335,7 +335,7 @@ msgstr ""
#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:91
msgid "2. Port Conflict Detection"
-msgstr "2. 端口冲突检测"
+msgstr "2.端口冲突检测"
#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:93
msgid ""
@@ -348,7 +348,7 @@ msgstr ""
#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:95
msgid "3. PD Ratio Validation"
-msgstr "3. PD 比例验证"
+msgstr "3.PD 比例验证"
#: ../../source/developer_guide/Design_Documents/disaggregated_prefill.md:97
msgid ""
diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po
index 034ab42e..87e9d84f 100644
--- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po
@@ -434,6 +434,10 @@ msgstr ""
msgid "Consistency"
msgstr "一致性"
+#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:236
+msgid "Expert Map"
+msgstr "专家映射"
+
#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:237
msgid ""
"The expert map must be globally unique during initialization and update. "
diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po
index e58e6652..e421ef02 100644
--- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po
@@ -38,7 +38,7 @@ msgstr "在线服务器"
#: ../../source/developer_guide/evaluation/using_ais_bench.md:7
msgid "1. Start the vLLM server"
-msgstr "1. 启动 vLLM 服务器"
+msgstr "1.启动 vLLM 服务器"
#: ../../source/developer_guide/evaluation/using_ais_bench.md:9
msgid "You can run docker container to start the vLLM server on a single NPU:"
@@ -60,7 +60,7 @@ msgstr "如果看到如下日志,则 vLLM 服务器启动成功:"
#: ../../source/developer_guide/evaluation/using_ais_bench.md:56
msgid "2. Run different datasets using AISBench"
-msgstr "2. 使用 AISBench 运行不同数据集"
+msgstr "2.使用 AISBench 运行不同数据集"
#: ../../source/developer_guide/evaluation/using_ais_bench.md:58
msgid "Install AISBench"
@@ -227,7 +227,7 @@ msgstr "执行后,您可以从保存的文件中获取结果,示例如下:
#: ../../source/developer_guide/evaluation/using_ais_bench.md:300
msgid "3. Troubleshooting"
-msgstr "3. 故障排除"
+msgstr "3.故障排除"
#: ../../source/developer_guide/evaluation/using_ais_bench.md:302
msgid "Invalid Image Path Error"
diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po
index cc4b49d7..ccbdf895 100644
--- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po
@@ -28,7 +28,7 @@ msgstr ""
#: ../../source/developer_guide/evaluation/using_evalscope.md:5
msgid "1. Online server"
-msgstr "1. 在线服务器"
+msgstr "1.在线服务器"
#: ../../source/developer_guide/evaluation/using_evalscope.md:7
msgid "You can run docker container to start the vLLM server on a single NPU:"
@@ -48,7 +48,7 @@ msgstr "服务器启动后,你可以在新的终端中使用输入提示词查
#: ../../source/developer_guide/evaluation/using_evalscope.md:56
msgid "2. Install EvalScope using pip"
-msgstr "2. 使用 pip 安装 EvalScope"
+msgstr "2.使用 pip 安装 EvalScope"
#: ../../source/developer_guide/evaluation/using_evalscope.md:58
msgid "You can install EvalScope as follows:"
@@ -56,7 +56,7 @@ msgstr "你可以通过以下方式安装 EvalScope:"
#: ../../source/developer_guide/evaluation/using_evalscope.md:66
msgid "3. Run GSM8K using EvalScope for accuracy testing"
-msgstr "3. 使用 EvalScope 运行 GSM8K 进行精度测试"
+msgstr "3.使用 EvalScope 运行 GSM8K 进行精度测试"
#: ../../source/developer_guide/evaluation/using_evalscope.md:68
msgid ""
@@ -81,7 +81,7 @@ msgstr ""
#: ../../source/developer_guide/evaluation/using_evalscope.md:92
msgid "4. Run model inference stress testing using EvalScope"
-msgstr "4. 使用 EvalScope 运行模型推理压力测试"
+msgstr "4.使用 EvalScope 运行模型推理压力测试"
#: ../../source/developer_guide/evaluation/using_evalscope.md:94
msgid "Install EvalScope[perf] using pip"
diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po
index ee01492d..c6856465 100644
--- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po
@@ -33,7 +33,7 @@ msgstr "在线服务器"
#: ../../source/developer_guide/evaluation/using_lm_eval.md:7
msgid "1. Start the vLLM server"
-msgstr "1. 启动 vLLM 服务器"
+msgstr "1.启动 vLLM 服务器"
#: ../../source/developer_guide/evaluation/using_lm_eval.md:9
msgid "You can run docker container to start the vLLM server on a single NPU:"
@@ -48,7 +48,7 @@ msgid ""
"2. Run GSM8K using the vLLM server (curl) and then run lm-eval for "
"accuracy testing"
msgstr ""
-"2. 使用 vLLM 服务器(curl)运行 GSM8K,然后运行 lm-eval 进行准确率测试"
+"2.使用 vLLM 服务器(curl)运行 GSM8K,然后运行 lm-eval 进行准确率测试"
#: ../../source/developer_guide/evaluation/using_lm_eval.md:48
msgid "You can query the result with input prompts:"
@@ -90,7 +90,7 @@ msgstr "离线服务器"
#: ../../source/developer_guide/evaluation/using_lm_eval.md:145
msgid "1. Run docker container"
-msgstr "1. 运行 docker 容器"
+msgstr "1.运行 docker 容器"
#: ../../source/developer_guide/evaluation/using_lm_eval.md:147
msgid "You can run docker container on a single NPU:"
@@ -98,7 +98,7 @@ msgstr "您可以在单个 NPU 上运行 docker 容器:"
#: ../../source/developer_guide/evaluation/using_lm_eval.md:175
msgid "2. Run GSM8K using lm-eval for accuracy testing"
-msgstr "2. 使用 lm-eval 运行 GSM8K 进行准确率测试"
+msgstr "2.使用 lm-eval 运行 GSM8K 进行准确率测试"
#: ../../source/developer_guide/evaluation/using_lm_eval.md:203
msgid "After 1 to 2 minutes, the output is shown below:"
diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po
index 4c860d0e..be6d0247 100644
--- a/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po
@@ -33,7 +33,7 @@ msgstr ""
#: ../../source/developer_guide/evaluation/using_opencompass.md:5
msgid "1. Online Server"
-msgstr "1. 在线服务"
+msgstr "1.在线服务"
#: ../../source/developer_guide/evaluation/using_opencompass.md:7
msgid "You can run a docker container to start the vLLM server on a single NPU:"
@@ -53,7 +53,7 @@ msgstr "服务器启动后,你可以在新的终端中使用输入提示词来
msgid ""
"2. Run C-Eval (a Chinese language model evaluation benchmark) using "
"OpenCompass for accuracy testing"
-msgstr "2. 使用 OpenCompass 运行 C-Eval 进行准确率测试"
+msgstr "2.使用 OpenCompass 运行 C-Eval 进行准确率测试"
#: ../../source/developer_guide/evaluation/using_opencompass.md:58
msgid ""
diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po b/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po
index d0e2f65f..9901e8c0 100644
--- a/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po
@@ -49,7 +49,7 @@ msgstr "通用常见问题"
#: ../../source/faqs.md:10
msgid "1. What devices are currently supported?"
-msgstr "1. 目前支持哪些设备?"
+msgstr "1.目前支持哪些设备?"
#: ../../source/faqs.md:12
msgid ""
@@ -115,7 +115,7 @@ msgstr ""
#: ../../source/faqs.md:28
msgid "2. How to get our docker containers?"
-msgstr "2. 如何获取我们的 Docker 容器?"
+msgstr "2.如何获取我们的 Docker 容器?"
#: ../../source/faqs.md:30
msgid ""
@@ -154,7 +154,7 @@ msgstr "**在无互联网访问权限的环境中导入 Docker 镜像:**"
#: ../../source/faqs.md:70
msgid "3. What models does vllm-ascend supports?"
-msgstr "3. vllm-ascend 支持哪些模型?"
+msgstr "3.vllm-ascend 支持哪些模型?"
#: ../../source/faqs.md:72
msgid ""
@@ -164,7 +164,7 @@ msgstr "更多详细信息请参见[此处](https://docs.vllm.ai/projects
#: ../../source/faqs.md:74
msgid "4. How to get in touch with our community?"
-msgstr "4. 如何与我们的社区取得联系?"
+msgstr "4.如何与我们的社区取得联系?"
#: ../../source/faqs.md:76
msgid ""
@@ -205,7 +205,7 @@ msgstr ""
#: ../../source/faqs.md:83
msgid "5. What features does vllm-ascend V1 supports?"
-msgstr "5. vllm-ascend V1 支持哪些功能?"
+msgstr "5.vllm-ascend V1 支持哪些功能?"
#: ../../source/faqs.md:85
msgid ""
@@ -217,7 +217,7 @@ msgstr "更多详细信息请参见[此处](https://docs.vllm.ai/projects
msgid ""
"6. How to solve the problem of \"Failed to infer device type\" or "
"\"libatb.so: cannot open shared object file\"?"
-msgstr "6. 如何解决“无法推断设备类型”或“libatb.so:无法打开共享对象文件”的问题?"
+msgstr "6.如何解决“无法推断设备类型”或“libatb.so:无法打开共享对象文件”的问题?"
#: ../../source/faqs.md:89
msgid ""
@@ -251,7 +251,7 @@ msgstr "如果以上所有步骤都无法解决问题,请随时提交一个 Gi
#: ../../source/faqs.md:105
msgid "7. How vllm-ascend work with vLLM?"
-msgstr "7. vllm-ascend 如何与 vLLM 协同工作?"
+msgstr "7.vllm-ascend 如何与 vLLM 协同工作?"
#: ../../source/faqs.md:107
msgid ""
@@ -266,7 +266,7 @@ msgstr ""
#: ../../source/faqs.md:109
msgid "8. Does vllm-ascend support Prefill Disaggregation feature?"
-msgstr "8. vllm-ascend 是否支持 Prefill Disaggregation 功能?"
+msgstr "8.vllm-ascend 是否支持 Prefill Disaggregation 功能?"
#: ../../source/faqs.md:111
msgid ""
@@ -280,7 +280,7 @@ msgstr ""
#: ../../source/faqs.md:113
msgid "9. Does vllm-ascend support quantization method?"
-msgstr "9. vllm-ascend 是否支持量化方法?"
+msgstr "9.vllm-ascend 是否支持量化方法?"
#: ../../source/faqs.md:115
msgid ""
@@ -290,7 +290,7 @@ msgstr "目前,vllm-ascend 已支持 w8a8、w4a8 和 w4a4 量化方法。"
#: ../../source/faqs.md:117
msgid "10. How is vllm-ascend tested?"
-msgstr "10. vllm-ascend 是如何测试的?"
+msgstr "10.vllm-ascend 是如何测试的?"
#: ../../source/faqs.md:119
msgid ""
@@ -339,7 +339,7 @@ msgstr "对于每个版本,我们未来都将发布性能测试和准确性测
#: ../../source/faqs.md:131
msgid "11. How to fix the error \"InvalidVersion\" when using vllm-ascend?"
-msgstr "11. 使用 vllm-ascend 时如何修复 \"InvalidVersion\" 错误?"
+msgstr "11.使用 vllm-ascend 时如何修复 \"InvalidVersion\" 错误?"
#: ../../source/faqs.md:133
msgid ""
@@ -356,7 +356,7 @@ msgstr ""
#: ../../source/faqs.md:135
msgid "12. How to handle the out-of-memory issue?"
-msgstr "12. 如何处理内存不足问题?"
+msgstr "12.如何处理内存不足问题?"
#: ../../source/faqs.md:137
msgid ""
@@ -410,7 +410,7 @@ msgstr ""
#: ../../source/faqs.md:147
msgid "13. Failed to enable NPU graph mode when running DeepSeek"
-msgstr "13. 运行 DeepSeek 时无法启用 NPU 图模式"
+msgstr "13.运行 DeepSeek 时无法启用 NPU 图模式"
#: ../../source/faqs.md:149
msgid ""
@@ -438,7 +438,7 @@ msgstr ""
msgid ""
"14. Failed to reinstall vllm-ascend from source after uninstalling vllm-"
"ascend"
-msgstr "14. 卸载 vllm-ascend 后无法从源码重新安装 vllm-ascend"
+msgstr "14.卸载 vllm-ascend 后无法从源码重新安装 vllm-ascend"
#: ../../source/faqs.md:160
msgid ""
@@ -452,7 +452,7 @@ msgstr ""
#: ../../source/faqs.md:162
msgid "15. How to generate deterministic results when using vllm-ascend?"
-msgstr "15. 使用 vllm-ascend 时如何生成确定性结果?"
+msgstr "15.使用 vllm-ascend 时如何生成确定性结果?"
#: ../../source/faqs.md:164
msgid "There are several factors that affect output determinism:"
@@ -473,7 +473,7 @@ msgid ""
"16. How to fix the error \"ImportError: Please install vllm[audio] for "
"audio support\" for the Qwen2.5-Omni model?"
msgstr ""
-"16. 对于 Qwen2.5-Omni 模型,如何修复 \"ImportError: Please install vllm[audio] for"
+"16.对于 Qwen2.5-Omni 模型,如何修复 \"ImportError: Please install vllm[audio] for"
" audio support\" 错误?"
#: ../../source/faqs.md:202
@@ -493,7 +493,7 @@ msgstr ""
msgid ""
"17. How to troubleshoot and resolve size capture failures resulting from "
"stream resource exhaustion, and what are the underlying causes?"
-msgstr "17. 如何排查和解决因流资源耗尽导致的尺寸捕获失败,其根本原因是什么?"
+msgstr "17.如何排查和解决因流资源耗尽导致的尺寸捕获失败,其根本原因是什么?"
#: ../../source/faqs.md:213
msgid "Recommended mitigation strategies:"
@@ -531,7 +531,7 @@ msgstr ""
#: ../../source/faqs.md:221
msgid "18. How to install custom version of torch_npu?"
-msgstr "18. 如何安装自定义版本的 torch_npu?"
+msgstr "18.如何安装自定义版本的 torch_npu?"
#: ../../source/faqs.md:223
msgid ""
@@ -546,7 +546,7 @@ msgstr ""
msgid ""
"19. On certain systems (e.g., Kylin OS), `docker pull` may fail with an "
"`invalid tar header` error"
-msgstr "19. 在某些系统上(例如 Kylin OS),`docker pull` 可能因 `invalid tar header` 错误而失败"
+msgstr "19.在某些系统上(例如 Kylin OS),`docker pull` 可能因 `invalid tar header` 错误而失败"
#: ../../source/faqs.md:227
msgid ""
@@ -581,7 +581,7 @@ msgstr "将 `vllm_ascend_.tar` 文件(其中 `` 是你使用的镜
msgid ""
"20. Why am I getting an error when executing the script to start a Docker"
" container? The error message is: \"operation not permitted\""
-msgstr "20. 为什么执行启动 Docker 容器的脚本时会出错?错误信息是:\"operation not permitted\""
+msgstr "20.为什么执行启动 Docker 容器的脚本时会出错?错误信息是:\"operation not permitted\""
#: ../../source/faqs.md:254
msgid ""
@@ -598,7 +598,7 @@ msgstr ""
#: ../../source/faqs.md:256
msgid "21. How to achieve low latency in a small batch scenario?"
-msgstr "21. 如何在小批量场景下实现低延迟?"
+msgstr "21.如何在小批量场景下实现低延迟?"
#: ../../source/faqs.md:258
msgid ""
@@ -636,7 +636,7 @@ msgstr ""
msgid ""
"22. How to set `SOC_VERSION` when building from source on a CPU-only "
"machine?"
-msgstr "22. 在仅含 CPU 的机器上从源码构建时,如何设置 `SOC_VERSION`?"
+msgstr "22.在仅含 CPU 的机器上从源码构建时,如何设置 `SOC_VERSION`?"
#: ../../source/faqs.md:271
msgid ""
@@ -654,7 +654,7 @@ msgstr "你可以参考 `Dockerfile*` 中的默认值。例如:"
#: ../../source/faqs.md:289
msgid "23. Compilation error occasionally encounters with triton-ascend"
-msgstr "23. triton-ascend 偶尔遇到编译错误"
+msgstr "23.triton-ascend 偶尔遇到编译错误"
#: ../../source/faqs.md:291
msgid ""
@@ -670,7 +670,7 @@ msgstr ""
#: ../../source/faqs.md:300
msgid "24. Why TPOT increases drastically as concurrency grows?"
-msgstr "24. 为什么 TPOT 随着并发增长而急剧增加?"
+msgstr "24.为什么 TPOT 随着并发增长而急剧增加?"
#: ../../source/faqs.md:302
msgid ""
diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/installation.po b/docs/source/locale/zh_CN/LC_MESSAGES/installation.po
index bdc91b07..258ceae6 100644
--- a/docs/source/locale/zh_CN/LC_MESSAGES/installation.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/installation.po
@@ -470,11 +470,11 @@ msgstr "互连验证"
#: ../../source/installation.md:376
msgid "1. Get NPU IP Addresses"
-msgstr "1. 获取 NPU IP 地址"
+msgstr "1.获取 NPU IP 地址"
#: ../../source/installation.md:399
msgid "2. Cross-Node PING Test"
-msgstr "2. 跨节点 PING 测试"
+msgstr "2.跨节点 PING 测试"
#: ../../source/installation.md:406
msgid "Run Container In Each Node"
diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po
index 89399848..ab67ccc5 100644
--- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po
@@ -285,7 +285,7 @@ msgstr "运行 Mooncake Master"
#: ../../source/user_guide/feature_guide/kv_pool.md:109
msgid "1.Configure mooncake.json"
-msgstr "1. 配置 mooncake.json"
+msgstr "1.配置 mooncake.json"
#: ../../source/user_guide/feature_guide/kv_pool.md:111
msgid ""
@@ -307,7 +307,7 @@ msgstr ""
#: ../../source/user_guide/feature_guide/kv_pool.md:129
msgid "2.Start mooncake_master"
-msgstr "2. 启动 mooncake_master"
+msgstr "2.启动 mooncake_master"
#: ../../source/user_guide/feature_guide/kv_pool.md:131
msgid "Under the mooncake folder:"
@@ -335,7 +335,7 @@ msgstr "PD 解耦场景"
#: ../../source/user_guide/feature_guide/kv_pool.md:142
#: ../../source/user_guide/feature_guide/kv_pool.md:605
msgid "1.Run `prefill` Node and `decode` Node"
-msgstr "1. 运行 `prefill` 节点和 `decode` 节点"
+msgstr "1.运行 `prefill` 节点和 `decode` 节点"
#: ../../source/user_guide/feature_guide/kv_pool.md:144
msgid ""
@@ -392,7 +392,7 @@ msgstr "将 localhost 更改为您的实际 IP 地址。"
#: ../../source/user_guide/feature_guide/kv_pool.md:321
msgid "3.Run Inference"
-msgstr "3. 运行推理"
+msgstr "3.运行推理"
#: ../../source/user_guide/feature_guide/kv_pool.md:323
msgid ""
@@ -417,7 +417,7 @@ msgstr "PD混合推理"
#: ../../source/user_guide/feature_guide/kv_pool.md:339
#: ../../source/user_guide/feature_guide/kv_pool.md:916
msgid "1.Run Mixed Department Script"
-msgstr "1. 运行混合部署脚本"
+msgstr "1.运行混合部署脚本"
#: ../../source/user_guide/feature_guide/kv_pool.md:345
#: ../../source/user_guide/feature_guide/kv_pool.md:1056
@@ -426,7 +426,7 @@ msgstr "pd_mix.sh 内容:"
#: ../../source/user_guide/feature_guide/kv_pool.md:384
msgid "2.Run Inference"
-msgstr "2. 运行推理"
+msgstr "2.运行推理"
#: ../../source/user_guide/feature_guide/kv_pool.md:386
msgid ""
diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po
index 99ab6f33..70d304d6 100644
--- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po
@@ -484,7 +484,7 @@ msgstr "常见问题"
#: ../../source/user_guide/feature_guide/large_scale_ep.md:498
msgid "1. Prefiller nodes need to warm up"
-msgstr "1. 预填充节点需要预热"
+msgstr "1.预填充节点需要预热"
#: ../../source/user_guide/feature_guide/large_scale_ep.md:500
msgid ""
diff --git a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po
index 0b128768..8975f1dc 100644
--- a/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po
@@ -286,11 +286,11 @@ msgstr "运行前替换 `` `<...>` `` 中的部分。"
#: ../../source/user_guide/feature_guide/rfork.md:70
msgid "1. Install YuanRong TransferEngine"
-msgstr "1. 安装 YuanRong TransferEngine"
+msgstr "1.安装 YuanRong TransferEngine"
#: ../../source/user_guide/feature_guide/rfork.md:76
msgid "2. Start the Planner"
-msgstr "2. 启动规划器"
+msgstr "2.启动规划器"
#: ../../source/user_guide/feature_guide/rfork.md:78
msgid ""
@@ -300,7 +300,7 @@ msgstr "在 [`rfork_planner.py`](../../../../examples/rfork/rfork_planner.py)
#: ../../source/user_guide/feature_guide/rfork.md:86
msgid "3. Start vLLM Instances"
-msgstr "3. 启动 vLLM 实例"
+msgstr "3.启动 vLLM 实例"
#: ../../source/user_guide/feature_guide/rfork.md:88
msgid ""
diff --git a/docs/source/tutorials/models/DeepSeek-V3.2.md b/docs/source/tutorials/models/DeepSeek-V3.2.md
index 65782c2f..e6c62fb0 100644
--- a/docs/source/tutorials/models/DeepSeek-V3.2.md
+++ b/docs/source/tutorials/models/DeepSeek-V3.2.md
@@ -526,7 +526,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1
- export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
+ # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
+ export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export ASCEND_RT_VISIBLE_DEVICES=$1
@@ -600,7 +601,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1
- export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
+ # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
+ export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export ASCEND_RT_VISIBLE_DEVICES=$1
@@ -676,7 +678,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1
- export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
+ # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
+ export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export TASK_QUEUE_ENABLE=1
@@ -752,7 +755,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1
- export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
+ # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
+ export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export TASK_QUEUE_ENABLE=1
diff --git a/docs/source/tutorials/models/GLM4.x.md b/docs/source/tutorials/models/GLM4.x.md
index a167ecec..60273801 100644
--- a/docs/source/tutorials/models/GLM4.x.md
+++ b/docs/source/tutorials/models/GLM4.x.md
@@ -530,6 +530,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1
+ # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
+ export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export TASK_QUEUE_ENABLE=1
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/mooncake:$LD_LIBRARY_PATH
export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1
@@ -598,6 +600,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1
+ # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
+ export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export TASK_QUEUE_ENABLE=1
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/mooncake:$LD_LIBRARY_PATH
export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1
diff --git a/docs/source/tutorials/models/GLM5.md b/docs/source/tutorials/models/GLM5.md
index 0bf9a5ea..86ec5de7 100644
--- a/docs/source/tutorials/models/GLM5.md
+++ b/docs/source/tutorials/models/GLM5.md
@@ -766,7 +766,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1
- export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
+ # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
+ export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export ASCEND_RT_VISIBLE_DEVICES=$1
export VLLM_ASCEND_ENABLE_FLASHCOMM1=1
@@ -844,7 +845,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1
- export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
+ # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
+ export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export ASCEND_RT_VISIBLE_DEVICES=$1
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
@@ -926,7 +928,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1
- export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
+ # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
+ export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export TASK_QUEUE_ENABLE=1
@@ -1007,7 +1010,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1
- export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
+ # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
+ export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export TASK_QUEUE_ENABLE=1
@@ -1088,7 +1092,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1
- export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
+ # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
+ export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export TASK_QUEUE_ENABLE=1
@@ -1169,7 +1174,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1
- export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
+ # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
+ export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export TASK_QUEUE_ENABLE=1
diff --git a/docs/source/tutorials/models/Qwen3.5-397B-A17B.md b/docs/source/tutorials/models/Qwen3.5-397B-A17B.md
index fa7fadb4..53d524e5 100644
--- a/docs/source/tutorials/models/Qwen3.5-397B-A17B.md
+++ b/docs/source/tutorials/models/Qwen3.5-397B-A17B.md
@@ -288,7 +288,8 @@ To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need to depl
# jemalloc is for better performance, if `libjemalloc.so` is installed on your machine, you can turn it on.
# export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libjemalloc.so.2:$LD_PRELOAD
export VLLM_ENGINE_READY_TIMEOUT_S=30000
- export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=30000
+ # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
+ export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export IP_ADDRESS=$local_ip
export NETWORK_CARD_NAME=$nic_name
export HCCL_IF_IP=$IP_ADDRESS
@@ -362,7 +363,8 @@ To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need to depl
node0_ip="xxxx"
export VLLM_ENGINE_READY_TIMEOUT_S=30000
- export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=30000
+ # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
+ export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export MASTER_IP_ADDRESS=$node0_ip
export IP_ADDRESS=$local_ip
@@ -442,7 +444,8 @@ To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need to depl
node0_ip="xxxx"
export VLLM_ENGINE_READY_TIMEOUT_S=30000
- export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=30000
+ # Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
+ export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export MASTER_IP_ADDRESS=$node0_ip
export IP_ADDRESS=$local_ip
diff --git a/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml b/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml
index 1021a8db..0878245e 100644
--- a/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml
+++ b/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml
@@ -13,7 +13,7 @@ env_common:
HCCL_DETERMINISTIC: True
TASK_QUEUE_ENABLE: 1
HCCL_OP_RETRY_ENABLE: "L0:0, L1:0"
- VLLM_NIXL_ABORT_REQUEST_TIMEOUT: 300000
+ VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 480
disaggregated_prefill:
enabled: true
diff --git a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml
index e1b0a12c..b3d47ca0 100644
--- a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml
+++ b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml
@@ -15,7 +15,7 @@ env_common:
ASCEND_TRANSPORT_PRINT: 1
ACL_OP_INIT_MODE: 1
ASCEND_A3_ENABLE: 1
- VLLM_NIXL_ABORT_REQUEST_TIMEOUT: 300000
+ VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 480
VLLM_ENGINE_READY_TIMEOUT_S: 1800
HCCL_CONNECT_TIMEOUT: 1200
HCCL_INTRA_PCIE_ENABLE: 1
diff --git a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml
index 3c33d40b..9c46bb56 100644
--- a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml
+++ b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml
@@ -15,7 +15,7 @@ env_common:
ASCEND_TRANSPORT_PRINT: 1
ACL_OP_INIT_MODE: 1
ASCEND_A3_ENABLE: 1
- VLLM_NIXL_ABORT_REQUEST_TIMEOUT: 300000
+ VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 480
VLLM_ENGINE_READY_TIMEOUT_S: 1800
HCCL_CONNECT_TIMEOUT: 1200
HCCL_INTRA_PCIE_ENABLE: 1
diff --git a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py
index 1720c1b3..59a6fa7e 100644
--- a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py
+++ b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py
@@ -173,7 +173,7 @@ class KVCacheTaskTracker:
while self.delayed_free_requests:
request_id = next(iter(self.delayed_free_requests))
delay_start_time = self.delayed_free_requests[request_id]
- if current_time - delay_start_time > envs.VLLM_NIXL_ABORT_REQUEST_TIMEOUT:
+ if current_time - delay_start_time > envs.VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT:
self.delayed_free_requests.popitem(last=False)
self.reqs_to_process.discard(request_id)
expired_requests.add(request_id)