提交vllm0.11.0开发分支

2025-12-10 17:51:24 +08:00
parent deab7dd0b6
commit 7c22d621fb
175 changed files with 31856 additions and 8683 deletions
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/DeepSeek-V3.2-Exp.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/DeepSeek-V3.2-Exp.po
@@ -0,0 +1,30 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: vllm-kunlun \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/DeepSeek-V3.2-Exp.md:1
+msgid "DeepSeek-V3.2-Exp"
+msgstr ""
+
+#: ../../source/tutorials/DeepSeek-V3.2-Exp.md:3
+msgid "Introduction"
+msgstr ""
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po
@@ -0,0 +1,29 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: vllm-kunlun\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-07-18 09:01+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Language: zh_CN\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../tutorials/index.md:3
+msgid "Deployment"
+msgstr "部署"
+
+#: ../../tutorials/index.md:1
+msgid "Tutorials"
+msgstr "教程"
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po
@@ -0,0 +1,213 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version:  vllm-kunlun\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/multi_node.md:1
+msgid "Multi-Node-DP (DeepSeek)"
+msgstr "多节点分布式处理（DeepSeek）"
+
+#: ../../source/tutorials/multi_node.md:3
+msgid "Getting Start"
+msgstr "快速开始"
+
+#~ msgid ""
+#~ "vLLM-Kunlun now supports Data Parallel"
+#~ " (DP) deployment, enabling model weights"
+#~ " to be replicated across multiple "
+#~ "XPUs or instances, each processing "
+#~ "independent batches of requests. This is"
+#~ " particularly useful for scaling throughput"
+#~ " across devices while maintaining high "
+#~ "resource utilization."
+#~ msgstr ""
+#~ "vLLM-Kunlun 现在支持数据并行（DP）部署，可以在多个 XPU "
+#~ "或实例之间复制模型权重，每个实例处理独立的请求批次。这对于在保证高资源利用率的同时，实现跨设备的吞吐量扩展特别有用。"
+
+#~ msgid ""
+#~ "Each DP rank is deployed as a "
+#~ "separate “core engine” process which "
+#~ "communicates with front-end process(es) "
+#~ "via ZMQ sockets. Data Parallel can "
+#~ "be combined with Tensor Parallel, in "
+#~ "which case each DP engine owns a"
+#~ " number of per-XPU worker processes"
+#~ " equal to the TP size."
+#~ msgstr ""
+#~ "每个 DP 进程作为一个单独的“核心引擎”进程部署，并通过 ZMQ "
+#~ "套接字与前端进程通信。数据并行可以与张量并行结合使用，此时每个 DP 引擎拥有数量等于 TP "
+#~ "大小的每 XPU 工作进程。"
+
+#~ msgid ""
+#~ "For Mixture-of-Experts (MoE) models "
+#~ "— especially advanced architectures like "
+#~ "DeepSeek that utilize Multi-head Latent"
+#~ " Attention (MLA) — a hybrid "
+#~ "parallelism approach is recommended:     - "
+#~ "Use **Data Parallelism (DP)** for "
+#~ "attention layers, which are replicated "
+#~ "across devices and handle separate "
+#~ "batches.     - Use **Expert or Tensor"
+#~ " Parallelism (EP/TP)** for expert layers,"
+#~ " which are sharded across devices to"
+#~ " distribute the computation."
+#~ msgstr ""
+#~ "对于混合专家（Mixture-of-Experts, MoE）模型——尤其是像 "
+#~ "DeepSeek 这样采用多头潜在注意力（Multi-head Latent "
+#~ "Attention, MLA）的高级架构——推荐使用混合并行策略：\n"
+#~ "    - 对于注意力层，使用 **数据并行（Data Parallelism, DP）**，这些层会在各设备间复刻，并处理不同的批次。\n"
+#~ "    - 对于专家层，使用 **专家并行或张量并行（Expert or "
+#~ "Tensor Parallelism, EP/TP）**，这些层会在设备间分片，从而分担计算。"
+
+#~ msgid ""
+#~ "This division enables attention layers "
+#~ "to be replicated across Data Parallel"
+#~ " (DP) ranks, enabling them to process"
+#~ " different batches independently. Meanwhile, "
+#~ "expert layers are partitioned (sharded) "
+#~ "across devices using Expert or Tensor"
+#~ " Parallelism(DP*TP), maximizing hardware "
+#~ "utilization and efficiency."
+#~ msgstr "这种划分使得注意力层能够在数据并行（DP）组内复制，从而能够独立处理不同的批次。同时，专家层通过专家或张量并行（DP*TP）在设备间进行分区（切片），最大化硬件利用率和效率。"
+
+#~ msgid ""
+#~ "In these cases the data parallel "
+#~ "ranks are not completely independent, "
+#~ "forward passes must be aligned and "
+#~ "expert layers across all ranks are "
+#~ "required to synchronize during every "
+#~ "forward pass, even if there are "
+#~ "fewer requests to be processed than "
+#~ "DP ranks."
+#~ msgstr ""
+#~ "在这些情况下，数据并行的各个 rank 不是完全独立的，前向传播必须对齐，并且所有 rank "
+#~ "上的专家层在每次前向传播时都需要同步，即使待处理的请求数量少于 DP rank 的数量。"
+
+#~ msgid ""
+#~ "For MoE models, when any requests "
+#~ "are in progress in any rank, we"
+#~ " must ensure that empty “dummy” "
+#~ "forward passes are performed in all "
+#~ "ranks which don’t currently have any "
+#~ "requests scheduled. This is handled via"
+#~ " a separate DP `Coordinator` process "
+#~ "which communicates with all of the "
+#~ "ranks, and a collective operation "
+#~ "performed every N steps to determine "
+#~ "when all ranks become idle and can"
+#~ " be paused. When TP is used in"
+#~ " conjunction with DP, expert layers "
+#~ "form an EP or TP group of "
+#~ "size (DP x TP)."
+#~ msgstr ""
+#~ "对于 MoE 模型，当任何一个 rank 有请求正在进行时，必须确保所有当前没有请求的"
+#~ " rank 都执行空的“虚拟”前向传播。这是通过一个单独的 DP `Coordinator`"
+#~ " 协调器进程来实现的，该进程与所有 rank 通信，并且每隔 N "
+#~ "步执行一次集体操作，以判断所有 rank 是否都处于空闲状态并可以暂停。当 TP 与 "
+#~ "DP 结合使用时，专家层会组成一个规模为（DP x TP）的 EP 或 "
+#~ "TP 组。"
+
+#~ msgid "Verify Multi-Node Communication Environment"
+#~ msgstr "验证多节点通信环境"
+
+#~ msgid "Physical Layer Requirements:"
+#~ msgstr "物理层要求："
+
+#~ msgid ""
+#~ "The physical machines must be located"
+#~ " on the same WLAN, with network "
+#~ "connectivity."
+#~ msgstr "物理机器必须位于同一个 WLAN 中，并且具有网络连接。"
+
+#~ msgid ""
+#~ "All XPUs are connected with optical "
+#~ "modules, and the connection status must"
+#~ " be normal."
+#~ msgstr "所有 XPU 都通过光模块连接，且连接状态必须正常。"
+
+#~ msgid "Verification Process:"
+#~ msgstr "验证流程："
+
+#~ msgid ""
+#~ "Execute the following commands on each"
+#~ " node in sequence. The results must"
+#~ " all be `success` and the status "
+#~ "must be `UP`:"
+#~ msgstr "在每个节点上依次执行以下命令。所有结果必须为 `success` 且状态必须为 `UP`："
+
+#~ msgid "XPU Interconnect Verification:"
+#~ msgstr "XPU 互连验证："
+
+#~ msgid "1. Get XPU IP Addresses"
+#~ msgstr "1. 获取 XPU IP 地址"
+
+#~ msgid "2. Cross-Node PING Test"
+#~ msgstr "2. 跨节点PING测试"
+
+#~ msgid "Run with docker"
+#~ msgstr "用 docker 运行"
+
+#~ msgid ""
+#~ "Assume you have two Atlas 800 "
+#~ "A2(64G*8) nodes, and want to deploy "
+#~ "the `deepseek-v3-w8a8` quantitative model "
+#~ "across multi-node."
+#~ msgstr "假设你有两台 Atlas 800 A2（64G*8）节点，并且想要在多节点上部署 `deepseek-v3-w8a8` 量化模型。"
+
+#~ msgid ""
+#~ "Before launch the inference server, "
+#~ "ensure some environment variables are "
+#~ "set for multi node communication"
+#~ msgstr "在启动推理服务器之前，确保已经为多节点通信设置了一些环境变量。"
+
+#~ msgid "Run the following scripts on two nodes respectively"
+#~ msgstr "分别在两台节点上运行以下脚本"
+
+#~ msgid "**node0**"
+#~ msgstr "**节点0**"
+
+#~ msgid "**node1**"
+#~ msgstr "**节点1**"
+
+#~ msgid ""
+#~ "The Deployment view looks like:  ![alt"
+#~ " text](../assets/multi_node_dp.png)"
+#~ msgstr "部署视图如下所示：![替代文本](../assets/multi_node_dp.png)"
+
+#~ msgid "alt text"
+#~ msgstr "替代文本"
+
+#~ msgid ""
+#~ "Once your server is started, you "
+#~ "can query the model with input "
+#~ "prompts:"
+#~ msgstr "一旦你的服务器启动，你可以通过输入提示词来查询模型："
+
+#~ msgid "Run benchmarks"
+#~ msgstr "运行基准测试"
+
+#~ msgid ""
+#~ "For details please refer to "
+#~ "[benchmark](https://github.com/vllm-project/vllm-"
+#~ "kunlun/tree/main/benchmarks)"
+#~ msgstr ""
+#~ "详细信息请参阅 [benchmark](https://github.com/vllm-project"
+#~ "/vllm-kunlun/tree/main/benchmarks)"
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node_kimi.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node_kimi.po
@@ -0,0 +1,30 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: vllm-kunlun \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/multi_node_kimi.md:1
+msgid "Multi-Node-DP (Kimi-K2)"
+msgstr ""
+
+#: ../../source/tutorials/multi_node_kimi.md:3
+msgid "Verify Multi-Node Communication Environment"
+msgstr ""
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node_pd_disaggregation_llmdatadist.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node_pd_disaggregation_llmdatadist.po
@@ -0,0 +1,30 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: vllm-kunlun \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/multi_node_pd_disaggregation_llmdatadist.md:1
+msgid "Prefill-Decode Disaggregation Llmdatadist Verification (Qwen)"
+msgstr ""
+
+#: ../../source/tutorials/multi_node_pd_disaggregation_llmdatadist.md:3
+msgid "Getting Start"
+msgstr ""
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node_pd_disaggregation_mooncake.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node_pd_disaggregation_mooncake.po
@@ -0,0 +1,30 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: vllm-kunlun \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/multi_node_pd_disaggregation_mooncake.md:1
+msgid "Prefill-Decode Disaggregation Mooncake Verification (Qwen)"
+msgstr ""
+
+#: ../../source/tutorials/multi_node_pd_disaggregation_mooncake.md:3
+msgid "Getting Start"
+msgstr ""
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node_qwen3vl.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node_qwen3vl.po
@@ -0,0 +1,26 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: vllm-kunlun \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/multi_node_qwen3vl.md:1
+msgid "Multi-Node-DP (Qwen3-VL-235B-A22B)"
+msgstr ""
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node_ray.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node_ray.po
@@ -0,0 +1,26 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: vllm-kunlun \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/multi_node_ray.md:1
+msgid "Multi-Node-Ray (Qwen/Qwen3-235B-A22B)"
+msgstr ""
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po
@@ -0,0 +1,53 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version:  vllm-kunlun\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/multi_npu.md:1
+msgid "Multi-XPU (QwQ 32B)"
+msgstr "多-XPU（QwQ 32B）"
+
+#~ msgid "Run vllm-kunlun on Multi-XPU"
+#~ msgstr "在多XPU上运行 vllm-kunlun"
+
+#~ msgid "Run docker container:"
+#~ msgstr "运行 docker 容器："
+
+#~ msgid "Setup environment variables:"
+#~ msgstr "设置环境变量："
+
+#~ msgid "Online Inference on Multi-XPU"
+#~ msgstr "多XPU的在线推理"
+
+#~ msgid "Run the following script to start the vLLM server on Multi-XPU:"
+#~ msgstr "运行以下脚本，在多XPU上启动 vLLM 服务器："
+
+#~ msgid "Once your server is started, you can query the model with input prompts"
+#~ msgstr "一旦服务器启动，就可以通过输入提示词来查询模型。"
+
+#~ msgid "Offline Inference on Multi-XPU"
+#~ msgstr "多XPU离线推理"
+
+#~ msgid "Run the following script to execute offline inference on multi-XPU:"
+#~ msgstr "运行以下脚本以在多XPU上执行离线推理："
+
+#~ msgid "If you run this script successfully, you can see the info shown below:"
+#~ msgstr "如果你成功运行此脚本，你可以看到如下所示的信息："
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po
@@ -0,0 +1,74 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version:  vllm-kunlun\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/multi_npu_moge.md:1
+msgid "Multi-XPU (Pangu Pro MoE)"
+msgstr "多XPU（Pangu Pro MoE）"
+
+#~ msgid "Run vllm-kunlun on Multi-XPU"
+#~ msgstr "在多XPU上运行 vllm-kunlun"
+
+#~ msgid "Run container:"
+#~ msgstr "运行容器："
+
+#~ msgid "Setup environment variables:"
+#~ msgstr "设置环境变量："
+
+#~ msgid "Download the model:"
+#~ msgstr "下载该模型："
+
+#~ msgid "Online Inference on Multi-XPU"
+#~ msgstr "多XPU上的在线推理"
+
+#~ msgid "Run the following script to start the vLLM server on Multi-XPU:"
+#~ msgstr "运行以下脚本，在多XPU上启动 vLLM 服务器："
+
+#~ msgid ""
+#~ "Once your server is started, you "
+#~ "can query the model with input "
+#~ "prompts:"
+#~ msgstr "一旦你的服务器启动，你可以通过输入提示词来查询模型："
+
+#~ msgid "v1/completions"
+#~ msgstr "v1/补全"
+
+#~ msgid "v1/chat/completions"
+#~ msgstr "v1/chat/completions"
+
+#~ msgid "If you run this successfully, you can see the info shown below:"
+#~ msgstr "如果你成功运行这个，你可以看到如下所示的信息："
+
+#~ msgid "Offline Inference on Multi-XPU"
+#~ msgstr "多XPU离线推理"
+
+#~ msgid "Run the following script to execute offline inference on multi-XPU:"
+#~ msgstr "运行以下脚本以在多XPU上执行离线推理："
+
+#~ msgid "Graph Mode"
+#~ msgstr "图模式"
+
+#~ msgid "Eager Mode"
+#~ msgstr "即时模式"
+
+#~ msgid "If you run this script successfully, you can see the info shown below:"
+#~ msgstr "如果你成功运行此脚本，你可以看到如下所示的信息："
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po
@@ -0,0 +1,82 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version:  vllm-kunlun\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/multi_npu_quantization.md:1
+msgid "Multi-XPU (QwQ 32B W8A8)"
+msgstr "多XPU（QwQ 32B W8A8）"
+
+#: ../../source/tutorials/multi_npu_quantization.md:3
+#, fuzzy
+msgid "Run Docker Container"
+msgstr "运行 docker 容器"
+
+#~ msgid "w8a8 quantization feature is supported by v0.8.4rc2 or higher"
+#~ msgstr "w8a8 量化功能由 v0.8.4rc2 或更高版本支持"
+
+#~ msgid "Install modelslim and convert model"
+#~ msgstr "安装 modelslim 并转换模型"
+
+#~ msgid ""
+#~ "You can choose to convert the "
+#~ "model yourself or use the quantized "
+#~ "model we uploaded,  see "
+#~ "https://www.modelscope.cn/models/vllm-kunlun/QwQ-32B-"
+#~ "W8A8"
+#~ msgstr ""
+#~ "你可以选择自己转换模型，或者使用我们上传的量化模型，详见 https://www.modelscope.cn/models"
+#~ "/vllm-kunlun/QwQ-32B-W8A8"
+
+#~ msgid "Verify the quantized model"
+#~ msgstr "验证量化模型"
+
+#~ msgid "The converted model files looks like:"
+#~ msgstr "转换后的模型文件如下所示："
+
+#~ msgid "Run the following script to start the vLLM server with quantized model:"
+#~ msgstr "运行以下脚本以启动带有量化模型的 vLLM 服务器："
+
+#~ msgid ""
+#~ "The value \"kunlun\" for \"--"
+#~ "quantization\" argument will be supported "
+#~ "after [a specific PR](https://github.com/vllm-"
+#~ "project/vllm-kunlun/pull/877) is merged and"
+#~ " released, you can cherry-pick this"
+#~ " commit for now."
+#~ msgstr ""
+#~ "在 [特定的PR](https://github.com/vllm-project/vllm-"
+#~ "kunlun/pull/877) 合并并发布后， \"--quantization\" "
+#~ "参数将支持值 \"kunlun\"，你也可以现在手动挑选该提交。"
+
+#~ msgid "Once your server is started, you can query the model with input prompts"
+#~ msgstr "一旦服务器启动，就可以通过输入提示词来查询模型。"
+
+#~ msgid ""
+#~ "Run the following script to execute "
+#~ "offline inference on multi-XPU with "
+#~ "quantized model:"
+#~ msgstr "运行以下脚本，在多XPU上使用量化模型执行离线推理："
+
+#~ msgid ""
+#~ "To enable quantization for kunlun, "
+#~ "quantization method must be \"kunlun\""
+#~ msgstr "要在kunlun上启用量化，量化方法必须为“kunlun”。"
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po
@@ -0,0 +1,63 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version:  vllm-kunlun\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/multi_npu_qwen3_moe.md:1
+msgid "Multi-XPU (Qwen3-30B-A3B)"
+msgstr "多XPU（Qwen3-30B-A3B）"
+
+#~ msgid "Run vllm-kunlun on Multi-XPU with Qwen3 MoE"
+#~ msgstr "在多XPU上运行带有Qwen3 MoE的vllm-kunlun"
+
+#~ msgid "Run docker container:"
+#~ msgstr "运行 docker 容器："
+
+#~ msgid "Setup environment variables:"
+#~ msgstr "设置环境变量："
+
+#~ msgid "Online Inference on Multi-XPU"
+#~ msgstr "多XPU的在线推理"
+
+#~ msgid "Run the following script to start the vLLM server on Multi-XPU:"
+#~ msgstr "运行以下脚本以在多XPU上启动 vLLM 服务器："
+
+#~ msgid ""
+#~ "For an Atlas A2 with 64GB of "
+#~ "XPU card memory, tensor-parallel-size"
+#~ " should be at least 2, and for"
+#~ " 32GB of memory, tensor-parallel-size"
+#~ " should be at least 4."
+#~ msgstr ""
+#~ "对于拥有64GB XPU卡内存的Atlas A2，tensor-parallel-size"
+#~ " 至少应为2；对于32GB内存的XPU卡，tensor-parallel-size 至少应为4。"
+
+#~ msgid "Once your server is started, you can query the model with input prompts"
+#~ msgstr "一旦服务器启动，就可以通过输入提示词来查询模型。"
+
+#~ msgid "Offline Inference on Multi-XPU"
+#~ msgstr "多XPU离线推理"
+
+#~ msgid "Run the following script to execute offline inference on multi-XPU:"
+#~ msgstr "运行以下脚本以在多XPU上执行离线推理："
+
+#~ msgid "If you run this script successfully, you can see the info shown below:"
+#~ msgstr "如果你成功运行此脚本，你可以看到如下所示的信息："
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_next.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_next.po
@@ -0,0 +1,26 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: vllm-kunlun \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/multi_npu_qwen3_next.md:1
+msgid "Multi-XPU (Qwen3-Next)"
+msgstr ""
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po
@@ -0,0 +1,94 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version:  vllm-kunlun\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/single_node_300i.md:1
+#, fuzzy
+msgid "Single Node (Atlas 300I Series)"
+msgstr "单节点（Atlas 300I 系列）"
+
+#~ msgid ""
+#~ "This Atlas 300I series is currently "
+#~ "experimental. In future versions, there "
+#~ "may be behavioral changes around model"
+#~ " coverage, performance improvement."
+#~ msgstr "Atlas 300I 系列目前处于实验阶段。在未来的版本中，模型覆盖范围和性能提升方面可能会有行为上的变化。"
+
+#~ msgid "Run vLLM on Altlas 300I series"
+#~ msgstr "在 Altlas 300I 系列上运行 vLLM"
+
+#~ msgid "Run docker container:"
+#~ msgstr "运行 docker 容器："
+
+#~ msgid "Setup environment variables:"
+#~ msgstr "设置环境变量："
+
+#~ msgid "Online Inference on XPU"
+#~ msgstr "在XPU上进行在线推理"
+
+#~ msgid ""
+#~ "Run the following script to start "
+#~ "the vLLM server on XPU(Qwen3-0.6B:1 "
+#~ "card, Qwen2.5-7B-Instruct:2 cards, Pangu-"
+#~ "Pro-MoE-72B: 8 cards):"
+#~ msgstr ""
+#~ "运行以下脚本，在 XPU 上启动 vLLM 服务器（Qwen3-0.6B：1 "
+#~ "张卡，Qwen2.5-7B-Instruct：2 张卡，Pangu-Pro-MoE-"
+#~ "72B：8 张卡）："
+
+#~ msgid "Qwen3-0.6B"
+#~ msgstr "Qwen3-0.6B"
+
+#~ msgid "Run the following command to start the vLLM server:"
+#~ msgstr "运行以下命令以启动 vLLM 服务器："
+
+#~ msgid "Once your server is started, you can query the model with input prompts"
+#~ msgstr "一旦服务器启动，就可以通过输入提示词来查询模型。"
+
+#~ msgid "Qwen/Qwen2.5-7B-Instruct"
+#~ msgstr "Qwen/Qwen2.5-7B-Instruct"
+
+#~ msgid "Pangu-Pro-MoE-72B"
+#~ msgstr "Pangu-Pro-MoE-72B"
+
+#~ msgid "Download the model:"
+#~ msgstr "下载该模型："
+
+#~ msgid "If you run this script successfully, you can see the results."
+#~ msgstr "如果你成功运行此脚本，你就可以看到结果。"
+
+#~ msgid "Offline Inference"
+#~ msgstr "离线推理"
+
+#~ msgid ""
+#~ "Run the following script (`example.py`) "
+#~ "to execute offline inference on XPU:"
+#~ msgstr "运行以下脚本（`example.py`）以在 XPU 上执行离线推理："
+
+#~ msgid "Qwen2.5-7B-Instruct"
+#~ msgstr "Qwen2.5-7B-指令版"
+
+#~ msgid "Run script:"
+#~ msgstr "运行脚本："
+
+#~ msgid "If you run this script successfully, you can see the info shown below:"
+#~ msgstr "如果你成功运行此脚本，你可以看到如下所示的信息："
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po
@@ -0,0 +1,106 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version:  vllm-kunlun\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/single_npu.md:1
+msgid "Single XPU (Qwen3 8B)"
+msgstr "单个XPU（Qwen3 8B）"
+
+#: ../../source/tutorials/single_npu.md:3
+msgid "Run vllm-kunlun on Single XPU"
+msgstr "在单个 XPU 上运行 vllm-kunlun"
+
+#: ../../source/tutorials/single_npu.md:5
+msgid "Offline Inference on Single XPU"
+msgstr "在单个XPU上进行离线推理"
+
+#~ msgid "Run docker container:"
+#~ msgstr "运行 docker 容器："
+
+#~ msgid "Setup environment variables:"
+#~ msgstr "设置环境变量："
+
+#~ msgid ""
+#~ "`max_split_size_mb` prevents the native "
+#~ "allocator from splitting blocks larger "
+#~ "than this size (in MB). This can"
+#~ " reduce fragmentation and may allow "
+#~ "some borderline workloads to complete "
+#~ "without running out of memory. You "
+#~ "can find more details "
+#~ "[<u>here</u>](https://www.hikunlun.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/envref/envref_07_0061.html)."
+#~ msgstr ""
+#~ "`max_split_size_mb` 防止本地分配器拆分超过此大小（以 MB "
+#~ "为单位）的内存块。这可以减少内存碎片，并且可能让一些边缘情况下的工作负载顺利完成而不会耗尽内存。你可以在[<u>这里</u>](https://www.hikunlun.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/envref/envref_07_0061.html)找到更多详细信息。"
+
+#~ msgid "Run the following script to execute offline inference on a single XPU:"
+#~ msgstr "运行以下脚本以在单个 XPU 上执行离线推理："
+
+#~ msgid "Graph Mode"
+#~ msgstr "图模式"
+
+#~ msgid "Eager Mode"
+#~ msgstr "即时模式"
+
+#~ msgid "If you run this script successfully, you can see the info shown below:"
+#~ msgstr "如果你成功运行此脚本，你可以看到如下所示的信息："
+
+#~ msgid "Online Serving on Single XPU"
+#~ msgstr "单个 XPU 上的在线服务"
+
+#~ msgid "Run docker container to start the vLLM server on a single XPU:"
+#~ msgstr "运行 docker 容器，在单个 XPU 上启动 vLLM 服务器："
+
+#~ msgid ""
+#~ "Add `--max_model_len` option to avoid "
+#~ "ValueError that the Qwen2.5-7B model's "
+#~ "max seq len (32768) is larger than"
+#~ " the maximum number of tokens that"
+#~ " can be stored in KV cache "
+#~ "(26240). This will differ with different"
+#~ " XPU series base on the HBM "
+#~ "size. Please modify the value according"
+#~ " to a suitable value for your "
+#~ "XPU series."
+#~ msgstr ""
+#~ "添加 `--max_model_len` 选项，以避免出现 Qwen2.5-7B "
+#~ "模型的最大序列长度（32768）大于 KV 缓存能存储的最大 token "
+#~ "数（26240）时的 ValueError。不同 XPU 系列由于 HBM "
+#~ "容量不同，该值也会有所不同。请根据您的 XPU 系列，修改为合适的数值。"
+
+#~ msgid "If your service start successfully, you can see the info shown below:"
+#~ msgstr "如果你的服务启动成功，你会看到如下所示的信息："
+
+#~ msgid ""
+#~ "Once your server is started, you "
+#~ "can query the model with input "
+#~ "prompts:"
+#~ msgstr "一旦你的服务器启动，你可以通过输入提示词来查询模型："
+
+#~ msgid ""
+#~ "If you query the server successfully,"
+#~ " you can see the info shown "
+#~ "below (client):"
+#~ msgstr "如果你成功查询了服务器，你可以看到如下所示的信息（客户端）："
+
+#~ msgid "Logs of the vllm server:"
+#~ msgstr "vllm 服务器的日志："
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po
@@ -0,0 +1,77 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: vllm-kunlun\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-07-18 09:01+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Language: zh_CN\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../tutorials/single_npu_audio.md:1
+msgid "Single XPU (Qwen2-Audio 7B)"
+msgstr "单个 XPU（Qwen2-Audio 7B）"
+
+#: ../../tutorials/single_npu_audio.md:3
+msgid "Run vllm-kunlun on Single XPU"
+msgstr "在单个 XPU 上运行 vllm-kunlun"
+
+#: ../../tutorials/single_npu_audio.md:5
+msgid "Offline Inference on Single XPU"
+msgstr "在单个XPU上进行离线推理"
+
+#: ../../tutorials/single_npu_audio.md:7
+msgid "Run docker container:"
+msgstr "运行 docker 容器："
+
+#: ../../tutorials/single_npu_audio.md:29
+msgid "Setup environment variables:"
+msgstr "设置环境变量："
+
+#: ../../tutorials/single_npu_audio.md:40
+msgid ""
+"`max_split_size_mb` prevents the native allocator from splitting blocks "
+"larger than this size (in MB). This can reduce fragmentation and may allow "
+"some borderline workloads to complete without running out of memory. You can"
+" find more details "
+"[<u>here</u>](https://www.hikunlun.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/envref/envref_07_0061.html)."
+msgstr ""
+"`max_split_size_mb` 防止本地分配器拆分超过此大小（以 MB "
+"为单位）的内存块。这可以减少内存碎片，并且可能让一些边缘情况下的工作负载顺利完成而不会耗尽内存。你可以在[<u>这里</u>](https://www.hikunlun.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/envref/envref_07_0061.html)找到更多详细信息。"
+
+#: ../../tutorials/single_npu_audio.md:43
+msgid "Install packages required for audio processing:"
+msgstr "安装音频处理所需的软件包："
+
+#: ../../tutorials/single_npu_audio.md:50
+msgid "Run the following script to execute offline inference on a single XPU:"
+msgstr "运行以下脚本以在单个 XPU 上执行离线推理："
+
+#: ../../tutorials/single_npu_audio.md:114
+msgid "If you run this script successfully, you can see the info shown below:"
+msgstr "如果你成功运行此脚本，你可以看到如下所示的信息："
+
+#: ../../tutorials/single_npu_audio.md:120
+msgid "Online Serving on Single XPU"
+msgstr "单个 XPU 上的在线服务"
+
+#: ../../tutorials/single_npu_audio.md:122
+msgid ""
+"Currently, vllm's OpenAI-compatible server doesn't support audio inputs, "
+"find more details [<u>here</u>](https://github.com/vllm-"
+"project/vllm/issues/19977)."
+msgstr ""
+"目前，vllm 的兼容 OpenAI 的服务器不支持音频输入，更多详情请查看[<u>这里</u>](https://github.com/vllm-"
+"project/vllm/issues/19977)。"
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po
@@ -0,0 +1,99 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: vllm-kunlun\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-07-18 09:01+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Language: zh_CN\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../tutorials/single_npu_multimodal.md:1
+msgid "Single XPU (Qwen2.5-VL 7B)"
+msgstr "单个XPU（Qwen2.5-VL 7B）"
+
+#: ../../tutorials/single_npu_multimodal.md:3
+msgid "Run vllm-kunlun on Single XPU"
+msgstr "在单个 XPU 上运行 vllm-kunlun"
+
+#: ../../tutorials/single_npu_multimodal.md:5
+msgid "Offline Inference on Single XPU"
+msgstr "在单个XPU上进行离线推理"
+
+#: ../../tutorials/single_npu_multimodal.md:7
+msgid "Run docker container:"
+msgstr "运行 docker 容器："
+
+#: ../../tutorials/single_npu_multimodal.md:29
+msgid "Setup environment variables:"
+msgstr "设置环境变量："
+
+#: ../../tutorials/single_npu_multimodal.md:40
+msgid ""
+"`max_split_size_mb` prevents the native allocator from splitting blocks "
+"larger than this size (in MB). This can reduce fragmentation and may allow "
+"some borderline workloads to complete without running out of memory. You can"
+" find more details "
+"[<u>here</u>](https://www.hikunlun.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/envref/envref_07_0061.html)."
+msgstr ""
+"`max_split_size_mb` 防止本地分配器拆分超过此大小（以 MB "
+"为单位）的内存块。这可以减少内存碎片，并且可能让一些边缘情况下的工作负载顺利完成而不会耗尽内存。你可以在[<u>这里</u>](https://www.hikunlun.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/envref/envref_07_0061.html)找到更多详细信息。"
+
+#: ../../tutorials/single_npu_multimodal.md:43
+msgid "Run the following script to execute offline inference on a single XPU:"
+msgstr "运行以下脚本以在单个 XPU 上执行离线推理："
+
+#: ../../tutorials/single_npu_multimodal.md:109
+msgid "If you run this script successfully, you can see the info shown below:"
+msgstr "如果你成功运行此脚本，你可以看到如下所示的信息："
+
+#: ../../tutorials/single_npu_multimodal.md:121
+msgid "Online Serving on Single XPU"
+msgstr "单个 XPU 上的在线服务"
+
+#: ../../tutorials/single_npu_multimodal.md:123
+msgid "Run docker container to start the vLLM server on a single XPU:"
+msgstr "运行 docker 容器，在单个 XPU 上启动 vLLM 服务器："
+
+#: ../../tutorials/single_npu_multimodal.md:154
+msgid ""
+"Add `--max_model_len` option to avoid ValueError that the "
+"Qwen2.5-VL-7B-Instruct model's max seq len (128000) is larger than the "
+"maximum number of tokens that can be stored in KV cache. This will differ "
+"with different XPU series base on the HBM size. Please modify the value "
+"according to a suitable value for your XPU series."
+msgstr ""
+"新增 `--max_model_len` 选项，以避免出现 ValueError，即 Qwen2.5-VL-7B-Instruct "
+"模型的最大序列长度（128000）大于 KV 缓存可存储的最大 token 数。该数值会根据不同 XPU 系列的 HBM 大小而不同。请根据你的 XPU"
+" 系列，将该值设置为合适的数值。"
+
+#: ../../tutorials/single_npu_multimodal.md:157
+msgid "If your service start successfully, you can see the info shown below:"
+msgstr "如果你的服务启动成功，你会看到如下所示的信息："
+
+#: ../../tutorials/single_npu_multimodal.md:165
+msgid ""
+"Once your server is started, you can query the model with input prompts:"
+msgstr "一旦你的服务器启动，你可以通过输入提示词来查询模型："
+
+#: ../../tutorials/single_npu_multimodal.md:182
+msgid ""
+"If you query the server successfully, you can see the info shown below "
+"(client):"
+msgstr "如果你成功查询了服务器，你可以看到如下所示的信息（客户端）："
+
+#: ../../tutorials/single_npu_multimodal.md:188
+msgid "Logs of the vllm server:"
+msgstr "vllm 服务器的日志："
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen2.5_vl.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen2.5_vl.po
@@ -0,0 +1,38 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: vllm-kunlun \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/single_npu_qwen2.5_vl.md:1
+msgid "Single XPU (Qwen2.5-VL 7B)"
+msgstr ""
+
+#: ../../source/tutorials/single_npu_qwen2.5_vl.md:3
+msgid "Run vllm-kunlun on Single XPU"
+msgstr ""
+
+#: ../../source/tutorials/single_npu_qwen2.5_vl.md:5
+msgid "Offline Inference on Single XPU"
+msgstr ""
+
+#: ../../source/tutorials/single_npu_qwen2.5_vl.md:7
+msgid "Run docker container:"
+msgstr ""
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen2_audio.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen2_audio.po
@@ -0,0 +1,38 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: vllm-kunlun \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/single_npu_qwen2_audio.md:1
+msgid "Single XPU (Qwen2-Audio 7B)"
+msgstr ""
+
+#: ../../source/tutorials/single_npu_qwen2_audio.md:3
+msgid "Run vllm-kunlun on Single XPU"
+msgstr ""
+
+#: ../../source/tutorials/single_npu_qwen2_audio.md:5
+msgid "Offline Inference on Single XPU"
+msgstr ""
+
+#: ../../source/tutorials/single_npu_qwen2_audio.md:7
+msgid "Run docker container:"
+msgstr ""
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po
@@ -0,0 +1,77 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version:  vllm-kunlun\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/single_npu_qwen3_embedding.md:1
+msgid "Single XPU (Qwen3-Embedding-8B)"
+msgstr "单个XPU（Qwen3-Embedding-8B）"
+
+#: ../../source/tutorials/single_npu_qwen3_embedding.md:3
+msgid ""
+"The Qwen3 Embedding model series is the latest proprietary model of the "
+"Qwen family,"
+msgstr ""
+
+#~ msgid ""
+#~ "The Qwen3 Embedding model series is "
+#~ "the latest proprietary model of the "
+#~ "Qwen family, specifically designed for "
+#~ "text embedding and ranking tasks. "
+#~ "Building upon the dense foundational "
+#~ "models of the Qwen3 series, it "
+#~ "provides a comprehensive range of text"
+#~ " embeddings and reranking models in "
+#~ "various sizes (0.6B, 4B, and 8B). "
+#~ "This guide describes how to run "
+#~ "the model with vLLM Kunlun. Note "
+#~ "that only 0.9.2rc1 and higher versions"
+#~ " of vLLM Kunlun support the model."
+#~ msgstr ""
+#~ "Qwen3 Embedding 模型系列是 Qwen "
+#~ "家族最新的专有模型，专为文本嵌入和排序任务设计。在 Qwen3 "
+#~ "系列的密集基础模型之上，它提供了多种尺寸（0.6B、4B 和 8B）的文本嵌入与重排序模型。本指南介绍如何使用"
+#~ " vLLM Kunlun 运行该模型。请注意，只有 vLLM Kunlun "
+#~ "0.9.2rc1 及更高版本才支持该模型。"
+
+#~ msgid "Run docker container"
+#~ msgstr "运行 docker 容器"
+
+#~ msgid ""
+#~ "Take Qwen3-Embedding-8B model as an "
+#~ "example, first run the docker container"
+#~ " with the following command:"
+#~ msgstr "以 Qwen3-Embedding-8B 模型为例，首先使用以下命令运行 docker 容器："
+
+#~ msgid "Setup environment variables:"
+#~ msgstr "设置环境变量："
+
+#~ msgid "Online Inference"
+#~ msgstr "在线推理"
+
+#~ msgid "Once your server is started, you can query the model with input prompts"
+#~ msgstr "一旦服务器启动，就可以通过输入提示词来查询模型。"
+
+#~ msgid "Offline Inference"
+#~ msgstr "离线推理"
+
+#~ msgid "If you run this script successfully, you can see the info shown below:"
+#~ msgstr "如果你成功运行此脚本，你可以看到如下所示的信息："
+
--- a/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_quantization.po
+++ b/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_quantization.po
@@ -0,0 +1,30 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2025, vllm-kunlun team
+# This file is distributed under the same license as the vllm-kunlun
+# package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: vllm-kunlun \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-11-10 16:59+0800\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language: zh_CN\n"
+"Language-Team: zh_CN <LL@li.org>\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: Babel 2.17.0\n"
+
+#: ../../source/tutorials/single_npu_qwen3_quantization.md:1
+msgid "Single-XPU (Qwen3 8B W4A8)"
+msgstr ""
+
+#: ../../source/tutorials/single_npu_qwen3_quantization.md:3
+msgid "Run Docker Container"
+msgstr ""
+