提交vllm0.11.0开发分支

This commit is contained in:
chenyili
2025-12-10 17:51:24 +08:00
parent deab7dd0b6
commit 7c22d621fb
175 changed files with 31856 additions and 8683 deletions

View File

@@ -0,0 +1,30 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/DeepSeek-V3.2-Exp.md:1
msgid "DeepSeek-V3.2-Exp"
msgstr ""
#: ../../source/tutorials/DeepSeek-V3.2-Exp.md:3
msgid "Introduction"
msgstr ""

View File

@@ -0,0 +1,29 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-07-18 09:01+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Language: zh_CN\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"Generated-By: Babel 2.17.0\n"
#: ../../tutorials/index.md:3
msgid "Deployment"
msgstr "部署"
#: ../../tutorials/index.md:1
msgid "Tutorials"
msgstr "教程"

View File

@@ -0,0 +1,213 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/multi_node.md:1
msgid "Multi-Node-DP (DeepSeek)"
msgstr "多节点分布式处理DeepSeek"
#: ../../source/tutorials/multi_node.md:3
msgid "Getting Start"
msgstr "快速开始"
#~ msgid ""
#~ "vLLM-Kunlun now supports Data Parallel"
#~ " (DP) deployment, enabling model weights"
#~ " to be replicated across multiple "
#~ "XPUs or instances, each processing "
#~ "independent batches of requests. This is"
#~ " particularly useful for scaling throughput"
#~ " across devices while maintaining high "
#~ "resource utilization."
#~ msgstr ""
#~ "vLLM-Kunlun 现在支持数据并行DP部署可以在多个 XPU "
#~ "或实例之间复制模型权重,每个实例处理独立的请求批次。这对于在保证高资源利用率的同时,实现跨设备的吞吐量扩展特别有用。"
#~ msgid ""
#~ "Each DP rank is deployed as a "
#~ "separate “core engine” process which "
#~ "communicates with front-end process(es) "
#~ "via ZMQ sockets. Data Parallel can "
#~ "be combined with Tensor Parallel, in "
#~ "which case each DP engine owns a"
#~ " number of per-XPU worker processes"
#~ " equal to the TP size."
#~ msgstr ""
#~ "每个 DP 进程作为一个单独的“核心引擎”进程部署,并通过 ZMQ "
#~ "套接字与前端进程通信。数据并行可以与张量并行结合使用,此时每个 DP 引擎拥有数量等于 TP "
#~ "大小的每 XPU 工作进程。"
#~ msgid ""
#~ "For Mixture-of-Experts (MoE) models "
#~ "— especially advanced architectures like "
#~ "DeepSeek that utilize Multi-head Latent"
#~ " Attention (MLA) — a hybrid "
#~ "parallelism approach is recommended: - "
#~ "Use **Data Parallelism (DP)** for "
#~ "attention layers, which are replicated "
#~ "across devices and handle separate "
#~ "batches. - Use **Expert or Tensor"
#~ " Parallelism (EP/TP)** for expert layers,"
#~ " which are sharded across devices to"
#~ " distribute the computation."
#~ msgstr ""
#~ "对于混合专家Mixture-of-Experts, MoE模型——尤其是像 "
#~ "DeepSeek 这样采用多头潜在注意力Multi-head Latent "
#~ "Attention, MLA的高级架构——推荐使用混合并行策略\n"
#~ " - 对于注意力层,使用 **数据并行Data Parallelism, DP**,这些层会在各设备间复刻,并处理不同的批次。\n"
#~ " - 对于专家层,使用 **专家并行或张量并行Expert or "
#~ "Tensor Parallelism, EP/TP**,这些层会在设备间分片,从而分担计算。"
#~ msgid ""
#~ "This division enables attention layers "
#~ "to be replicated across Data Parallel"
#~ " (DP) ranks, enabling them to process"
#~ " different batches independently. Meanwhile, "
#~ "expert layers are partitioned (sharded) "
#~ "across devices using Expert or Tensor"
#~ " Parallelism(DP*TP), maximizing hardware "
#~ "utilization and efficiency."
#~ msgstr "这种划分使得注意力层能够在数据并行DP组内复制从而能够独立处理不同的批次。同时专家层通过专家或张量并行DP*TP在设备间进行分区切片最大化硬件利用率和效率。"
#~ msgid ""
#~ "In these cases the data parallel "
#~ "ranks are not completely independent, "
#~ "forward passes must be aligned and "
#~ "expert layers across all ranks are "
#~ "required to synchronize during every "
#~ "forward pass, even if there are "
#~ "fewer requests to be processed than "
#~ "DP ranks."
#~ msgstr ""
#~ "在这些情况下,数据并行的各个 rank 不是完全独立的,前向传播必须对齐,并且所有 rank "
#~ "上的专家层在每次前向传播时都需要同步,即使待处理的请求数量少于 DP rank 的数量。"
#~ msgid ""
#~ "For MoE models, when any requests "
#~ "are in progress in any rank, we"
#~ " must ensure that empty “dummy” "
#~ "forward passes are performed in all "
#~ "ranks which dont currently have any "
#~ "requests scheduled. This is handled via"
#~ " a separate DP `Coordinator` process "
#~ "which communicates with all of the "
#~ "ranks, and a collective operation "
#~ "performed every N steps to determine "
#~ "when all ranks become idle and can"
#~ " be paused. When TP is used in"
#~ " conjunction with DP, expert layers "
#~ "form an EP or TP group of "
#~ "size (DP x TP)."
#~ msgstr ""
#~ "对于 MoE 模型,当任何一个 rank 有请求正在进行时,必须确保所有当前没有请求的"
#~ " rank 都执行空的“虚拟”前向传播。这是通过一个单独的 DP `Coordinator`"
#~ " 协调器进程来实现的,该进程与所有 rank 通信,并且每隔 N "
#~ "步执行一次集体操作,以判断所有 rank 是否都处于空闲状态并可以暂停。当 TP 与 "
#~ "DP 结合使用时专家层会组成一个规模为DP x TP的 EP 或 "
#~ "TP 组。"
#~ msgid "Verify Multi-Node Communication Environment"
#~ msgstr "验证多节点通信环境"
#~ msgid "Physical Layer Requirements:"
#~ msgstr "物理层要求:"
#~ msgid ""
#~ "The physical machines must be located"
#~ " on the same WLAN, with network "
#~ "connectivity."
#~ msgstr "物理机器必须位于同一个 WLAN 中,并且具有网络连接。"
#~ msgid ""
#~ "All XPUs are connected with optical "
#~ "modules, and the connection status must"
#~ " be normal."
#~ msgstr "所有 XPU 都通过光模块连接,且连接状态必须正常。"
#~ msgid "Verification Process:"
#~ msgstr "验证流程:"
#~ msgid ""
#~ "Execute the following commands on each"
#~ " node in sequence. The results must"
#~ " all be `success` and the status "
#~ "must be `UP`:"
#~ msgstr "在每个节点上依次执行以下命令。所有结果必须为 `success` 且状态必须为 `UP`"
#~ msgid "XPU Interconnect Verification:"
#~ msgstr "XPU 互连验证:"
#~ msgid "1. Get XPU IP Addresses"
#~ msgstr "1. 获取 XPU IP 地址"
#~ msgid "2. Cross-Node PING Test"
#~ msgstr "2. 跨节点PING测试"
#~ msgid "Run with docker"
#~ msgstr "用 docker 运行"
#~ msgid ""
#~ "Assume you have two Atlas 800 "
#~ "A2(64G*8) nodes, and want to deploy "
#~ "the `deepseek-v3-w8a8` quantitative model "
#~ "across multi-node."
#~ msgstr "假设你有两台 Atlas 800 A264G*8节点并且想要在多节点上部署 `deepseek-v3-w8a8` 量化模型。"
#~ msgid ""
#~ "Before launch the inference server, "
#~ "ensure some environment variables are "
#~ "set for multi node communication"
#~ msgstr "在启动推理服务器之前,确保已经为多节点通信设置了一些环境变量。"
#~ msgid "Run the following scripts on two nodes respectively"
#~ msgstr "分别在两台节点上运行以下脚本"
#~ msgid "**node0**"
#~ msgstr "**节点0**"
#~ msgid "**node1**"
#~ msgstr "**节点1**"
#~ msgid ""
#~ "The Deployment view looks like: ![alt"
#~ " text](../assets/multi_node_dp.png)"
#~ msgstr "部署视图如下所示:![替代文本](../assets/multi_node_dp.png)"
#~ msgid "alt text"
#~ msgstr "替代文本"
#~ msgid ""
#~ "Once your server is started, you "
#~ "can query the model with input "
#~ "prompts:"
#~ msgstr "一旦你的服务器启动,你可以通过输入提示词来查询模型:"
#~ msgid "Run benchmarks"
#~ msgstr "运行基准测试"
#~ msgid ""
#~ "For details please refer to "
#~ "[benchmark](https://github.com/vllm-project/vllm-"
#~ "kunlun/tree/main/benchmarks)"
#~ msgstr ""
#~ "详细信息请参阅 [benchmark](https://github.com/vllm-project"
#~ "/vllm-kunlun/tree/main/benchmarks)"

View File

@@ -0,0 +1,30 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/multi_node_kimi.md:1
msgid "Multi-Node-DP (Kimi-K2)"
msgstr ""
#: ../../source/tutorials/multi_node_kimi.md:3
msgid "Verify Multi-Node Communication Environment"
msgstr ""

View File

@@ -0,0 +1,30 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/multi_node_pd_disaggregation_llmdatadist.md:1
msgid "Prefill-Decode Disaggregation Llmdatadist Verification (Qwen)"
msgstr ""
#: ../../source/tutorials/multi_node_pd_disaggregation_llmdatadist.md:3
msgid "Getting Start"
msgstr ""

View File

@@ -0,0 +1,30 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/multi_node_pd_disaggregation_mooncake.md:1
msgid "Prefill-Decode Disaggregation Mooncake Verification (Qwen)"
msgstr ""
#: ../../source/tutorials/multi_node_pd_disaggregation_mooncake.md:3
msgid "Getting Start"
msgstr ""

View File

@@ -0,0 +1,26 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/multi_node_qwen3vl.md:1
msgid "Multi-Node-DP (Qwen3-VL-235B-A22B)"
msgstr ""

View File

@@ -0,0 +1,26 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/multi_node_ray.md:1
msgid "Multi-Node-Ray (Qwen/Qwen3-235B-A22B)"
msgstr ""

View File

@@ -0,0 +1,53 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/multi_npu.md:1
msgid "Multi-XPU (QwQ 32B)"
msgstr "多-XPUQwQ 32B"
#~ msgid "Run vllm-kunlun on Multi-XPU"
#~ msgstr "在多XPU上运行 vllm-kunlun"
#~ msgid "Run docker container:"
#~ msgstr "运行 docker 容器:"
#~ msgid "Setup environment variables:"
#~ msgstr "设置环境变量:"
#~ msgid "Online Inference on Multi-XPU"
#~ msgstr "多XPU的在线推理"
#~ msgid "Run the following script to start the vLLM server on Multi-XPU:"
#~ msgstr "运行以下脚本在多XPU上启动 vLLM 服务器:"
#~ msgid "Once your server is started, you can query the model with input prompts"
#~ msgstr "一旦服务器启动,就可以通过输入提示词来查询模型。"
#~ msgid "Offline Inference on Multi-XPU"
#~ msgstr "多XPU离线推理"
#~ msgid "Run the following script to execute offline inference on multi-XPU:"
#~ msgstr "运行以下脚本以在多XPU上执行离线推理"
#~ msgid "If you run this script successfully, you can see the info shown below:"
#~ msgstr "如果你成功运行此脚本,你可以看到如下所示的信息:"

View File

@@ -0,0 +1,74 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/multi_npu_moge.md:1
msgid "Multi-XPU (Pangu Pro MoE)"
msgstr "多XPUPangu Pro MoE"
#~ msgid "Run vllm-kunlun on Multi-XPU"
#~ msgstr "在多XPU上运行 vllm-kunlun"
#~ msgid "Run container:"
#~ msgstr "运行容器:"
#~ msgid "Setup environment variables:"
#~ msgstr "设置环境变量:"
#~ msgid "Download the model:"
#~ msgstr "下载该模型:"
#~ msgid "Online Inference on Multi-XPU"
#~ msgstr "多XPU上的在线推理"
#~ msgid "Run the following script to start the vLLM server on Multi-XPU:"
#~ msgstr "运行以下脚本在多XPU上启动 vLLM 服务器:"
#~ msgid ""
#~ "Once your server is started, you "
#~ "can query the model with input "
#~ "prompts:"
#~ msgstr "一旦你的服务器启动,你可以通过输入提示词来查询模型:"
#~ msgid "v1/completions"
#~ msgstr "v1/补全"
#~ msgid "v1/chat/completions"
#~ msgstr "v1/chat/completions"
#~ msgid "If you run this successfully, you can see the info shown below:"
#~ msgstr "如果你成功运行这个,你可以看到如下所示的信息:"
#~ msgid "Offline Inference on Multi-XPU"
#~ msgstr "多XPU离线推理"
#~ msgid "Run the following script to execute offline inference on multi-XPU:"
#~ msgstr "运行以下脚本以在多XPU上执行离线推理"
#~ msgid "Graph Mode"
#~ msgstr "图模式"
#~ msgid "Eager Mode"
#~ msgstr "即时模式"
#~ msgid "If you run this script successfully, you can see the info shown below:"
#~ msgstr "如果你成功运行此脚本,你可以看到如下所示的信息:"

View File

@@ -0,0 +1,82 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/multi_npu_quantization.md:1
msgid "Multi-XPU (QwQ 32B W8A8)"
msgstr "多XPUQwQ 32B W8A8"
#: ../../source/tutorials/multi_npu_quantization.md:3
#, fuzzy
msgid "Run Docker Container"
msgstr "运行 docker 容器"
#~ msgid "w8a8 quantization feature is supported by v0.8.4rc2 or higher"
#~ msgstr "w8a8 量化功能由 v0.8.4rc2 或更高版本支持"
#~ msgid "Install modelslim and convert model"
#~ msgstr "安装 modelslim 并转换模型"
#~ msgid ""
#~ "You can choose to convert the "
#~ "model yourself or use the quantized "
#~ "model we uploaded, see "
#~ "https://www.modelscope.cn/models/vllm-kunlun/QwQ-32B-"
#~ "W8A8"
#~ msgstr ""
#~ "你可以选择自己转换模型,或者使用我们上传的量化模型,详见 https://www.modelscope.cn/models"
#~ "/vllm-kunlun/QwQ-32B-W8A8"
#~ msgid "Verify the quantized model"
#~ msgstr "验证量化模型"
#~ msgid "The converted model files looks like:"
#~ msgstr "转换后的模型文件如下所示:"
#~ msgid "Run the following script to start the vLLM server with quantized model:"
#~ msgstr "运行以下脚本以启动带有量化模型的 vLLM 服务器:"
#~ msgid ""
#~ "The value \"kunlun\" for \"--"
#~ "quantization\" argument will be supported "
#~ "after [a specific PR](https://github.com/vllm-"
#~ "project/vllm-kunlun/pull/877) is merged and"
#~ " released, you can cherry-pick this"
#~ " commit for now."
#~ msgstr ""
#~ "在 [特定的PR](https://github.com/vllm-project/vllm-"
#~ "kunlun/pull/877) 合并并发布后, \"--quantization\" "
#~ "参数将支持值 \"kunlun\",你也可以现在手动挑选该提交。"
#~ msgid "Once your server is started, you can query the model with input prompts"
#~ msgstr "一旦服务器启动,就可以通过输入提示词来查询模型。"
#~ msgid ""
#~ "Run the following script to execute "
#~ "offline inference on multi-XPU with "
#~ "quantized model:"
#~ msgstr "运行以下脚本在多XPU上使用量化模型执行离线推理"
#~ msgid ""
#~ "To enable quantization for kunlun, "
#~ "quantization method must be \"kunlun\""
#~ msgstr "要在kunlun上启用量化量化方法必须为“kunlun”。"

View File

@@ -0,0 +1,63 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/multi_npu_qwen3_moe.md:1
msgid "Multi-XPU (Qwen3-30B-A3B)"
msgstr "多XPUQwen3-30B-A3B"
#~ msgid "Run vllm-kunlun on Multi-XPU with Qwen3 MoE"
#~ msgstr "在多XPU上运行带有Qwen3 MoE的vllm-kunlun"
#~ msgid "Run docker container:"
#~ msgstr "运行 docker 容器:"
#~ msgid "Setup environment variables:"
#~ msgstr "设置环境变量:"
#~ msgid "Online Inference on Multi-XPU"
#~ msgstr "多XPU的在线推理"
#~ msgid "Run the following script to start the vLLM server on Multi-XPU:"
#~ msgstr "运行以下脚本以在多XPU上启动 vLLM 服务器:"
#~ msgid ""
#~ "For an Atlas A2 with 64GB of "
#~ "XPU card memory, tensor-parallel-size"
#~ " should be at least 2, and for"
#~ " 32GB of memory, tensor-parallel-size"
#~ " should be at least 4."
#~ msgstr ""
#~ "对于拥有64GB XPU卡内存的Atlas A2tensor-parallel-size"
#~ " 至少应为2对于32GB内存的XPU卡tensor-parallel-size 至少应为4。"
#~ msgid "Once your server is started, you can query the model with input prompts"
#~ msgstr "一旦服务器启动,就可以通过输入提示词来查询模型。"
#~ msgid "Offline Inference on Multi-XPU"
#~ msgstr "多XPU离线推理"
#~ msgid "Run the following script to execute offline inference on multi-XPU:"
#~ msgstr "运行以下脚本以在多XPU上执行离线推理"
#~ msgid "If you run this script successfully, you can see the info shown below:"
#~ msgstr "如果你成功运行此脚本,你可以看到如下所示的信息:"

View File

@@ -0,0 +1,26 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/multi_npu_qwen3_next.md:1
msgid "Multi-XPU (Qwen3-Next)"
msgstr ""

View File

@@ -0,0 +1,94 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/single_node_300i.md:1
#, fuzzy
msgid "Single Node (Atlas 300I Series)"
msgstr "单节点Atlas 300I 系列)"
#~ msgid ""
#~ "This Atlas 300I series is currently "
#~ "experimental. In future versions, there "
#~ "may be behavioral changes around model"
#~ " coverage, performance improvement."
#~ msgstr "Atlas 300I 系列目前处于实验阶段。在未来的版本中,模型覆盖范围和性能提升方面可能会有行为上的变化。"
#~ msgid "Run vLLM on Altlas 300I series"
#~ msgstr "在 Altlas 300I 系列上运行 vLLM"
#~ msgid "Run docker container:"
#~ msgstr "运行 docker 容器:"
#~ msgid "Setup environment variables:"
#~ msgstr "设置环境变量:"
#~ msgid "Online Inference on XPU"
#~ msgstr "在XPU上进行在线推理"
#~ msgid ""
#~ "Run the following script to start "
#~ "the vLLM server on XPU(Qwen3-0.6B:1 "
#~ "card, Qwen2.5-7B-Instruct:2 cards, Pangu-"
#~ "Pro-MoE-72B: 8 cards):"
#~ msgstr ""
#~ "运行以下脚本,在 XPU 上启动 vLLM 服务器Qwen3-0.6B1 "
#~ "张卡Qwen2.5-7B-Instruct2 张卡Pangu-Pro-MoE-"
#~ "72B8 张卡):"
#~ msgid "Qwen3-0.6B"
#~ msgstr "Qwen3-0.6B"
#~ msgid "Run the following command to start the vLLM server:"
#~ msgstr "运行以下命令以启动 vLLM 服务器:"
#~ msgid "Once your server is started, you can query the model with input prompts"
#~ msgstr "一旦服务器启动,就可以通过输入提示词来查询模型。"
#~ msgid "Qwen/Qwen2.5-7B-Instruct"
#~ msgstr "Qwen/Qwen2.5-7B-Instruct"
#~ msgid "Pangu-Pro-MoE-72B"
#~ msgstr "Pangu-Pro-MoE-72B"
#~ msgid "Download the model:"
#~ msgstr "下载该模型:"
#~ msgid "If you run this script successfully, you can see the results."
#~ msgstr "如果你成功运行此脚本,你就可以看到结果。"
#~ msgid "Offline Inference"
#~ msgstr "离线推理"
#~ msgid ""
#~ "Run the following script (`example.py`) "
#~ "to execute offline inference on XPU:"
#~ msgstr "运行以下脚本(`example.py`)以在 XPU 上执行离线推理:"
#~ msgid "Qwen2.5-7B-Instruct"
#~ msgstr "Qwen2.5-7B-指令版"
#~ msgid "Run script:"
#~ msgstr "运行脚本:"
#~ msgid "If you run this script successfully, you can see the info shown below:"
#~ msgstr "如果你成功运行此脚本,你可以看到如下所示的信息:"

View File

@@ -0,0 +1,106 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/single_npu.md:1
msgid "Single XPU (Qwen3 8B)"
msgstr "单个XPUQwen3 8B"
#: ../../source/tutorials/single_npu.md:3
msgid "Run vllm-kunlun on Single XPU"
msgstr "在单个 XPU 上运行 vllm-kunlun"
#: ../../source/tutorials/single_npu.md:5
msgid "Offline Inference on Single XPU"
msgstr "在单个XPU上进行离线推理"
#~ msgid "Run docker container:"
#~ msgstr "运行 docker 容器:"
#~ msgid "Setup environment variables:"
#~ msgstr "设置环境变量:"
#~ msgid ""
#~ "`max_split_size_mb` prevents the native "
#~ "allocator from splitting blocks larger "
#~ "than this size (in MB). This can"
#~ " reduce fragmentation and may allow "
#~ "some borderline workloads to complete "
#~ "without running out of memory. You "
#~ "can find more details "
#~ "[<u>here</u>](https://www.hikunlun.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/envref/envref_07_0061.html)."
#~ msgstr ""
#~ "`max_split_size_mb` 防止本地分配器拆分超过此大小(以 MB "
#~ "为单位)的内存块。这可以减少内存碎片,并且可能让一些边缘情况下的工作负载顺利完成而不会耗尽内存。你可以在[<u>这里</u>](https://www.hikunlun.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/envref/envref_07_0061.html)找到更多详细信息。"
#~ msgid "Run the following script to execute offline inference on a single XPU:"
#~ msgstr "运行以下脚本以在单个 XPU 上执行离线推理:"
#~ msgid "Graph Mode"
#~ msgstr "图模式"
#~ msgid "Eager Mode"
#~ msgstr "即时模式"
#~ msgid "If you run this script successfully, you can see the info shown below:"
#~ msgstr "如果你成功运行此脚本,你可以看到如下所示的信息:"
#~ msgid "Online Serving on Single XPU"
#~ msgstr "单个 XPU 上的在线服务"
#~ msgid "Run docker container to start the vLLM server on a single XPU:"
#~ msgstr "运行 docker 容器,在单个 XPU 上启动 vLLM 服务器:"
#~ msgid ""
#~ "Add `--max_model_len` option to avoid "
#~ "ValueError that the Qwen2.5-7B model's "
#~ "max seq len (32768) is larger than"
#~ " the maximum number of tokens that"
#~ " can be stored in KV cache "
#~ "(26240). This will differ with different"
#~ " XPU series base on the HBM "
#~ "size. Please modify the value according"
#~ " to a suitable value for your "
#~ "XPU series."
#~ msgstr ""
#~ "添加 `--max_model_len` 选项,以避免出现 Qwen2.5-7B "
#~ "模型的最大序列长度32768大于 KV 缓存能存储的最大 token "
#~ "数26240时的 ValueError。不同 XPU 系列由于 HBM "
#~ "容量不同,该值也会有所不同。请根据您的 XPU 系列,修改为合适的数值。"
#~ msgid "If your service start successfully, you can see the info shown below:"
#~ msgstr "如果你的服务启动成功,你会看到如下所示的信息:"
#~ msgid ""
#~ "Once your server is started, you "
#~ "can query the model with input "
#~ "prompts:"
#~ msgstr "一旦你的服务器启动,你可以通过输入提示词来查询模型:"
#~ msgid ""
#~ "If you query the server successfully,"
#~ " you can see the info shown "
#~ "below (client):"
#~ msgstr "如果你成功查询了服务器,你可以看到如下所示的信息(客户端):"
#~ msgid "Logs of the vllm server:"
#~ msgstr "vllm 服务器的日志:"

View File

@@ -0,0 +1,77 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-07-18 09:01+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Language: zh_CN\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"Generated-By: Babel 2.17.0\n"
#: ../../tutorials/single_npu_audio.md:1
msgid "Single XPU (Qwen2-Audio 7B)"
msgstr "单个 XPUQwen2-Audio 7B"
#: ../../tutorials/single_npu_audio.md:3
msgid "Run vllm-kunlun on Single XPU"
msgstr "在单个 XPU 上运行 vllm-kunlun"
#: ../../tutorials/single_npu_audio.md:5
msgid "Offline Inference on Single XPU"
msgstr "在单个XPU上进行离线推理"
#: ../../tutorials/single_npu_audio.md:7
msgid "Run docker container:"
msgstr "运行 docker 容器:"
#: ../../tutorials/single_npu_audio.md:29
msgid "Setup environment variables:"
msgstr "设置环境变量:"
#: ../../tutorials/single_npu_audio.md:40
msgid ""
"`max_split_size_mb` prevents the native allocator from splitting blocks "
"larger than this size (in MB). This can reduce fragmentation and may allow "
"some borderline workloads to complete without running out of memory. You can"
" find more details "
"[<u>here</u>](https://www.hikunlun.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/envref/envref_07_0061.html)."
msgstr ""
"`max_split_size_mb` 防止本地分配器拆分超过此大小(以 MB "
"为单位)的内存块。这可以减少内存碎片,并且可能让一些边缘情况下的工作负载顺利完成而不会耗尽内存。你可以在[<u>这里</u>](https://www.hikunlun.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/envref/envref_07_0061.html)找到更多详细信息。"
#: ../../tutorials/single_npu_audio.md:43
msgid "Install packages required for audio processing:"
msgstr "安装音频处理所需的软件包:"
#: ../../tutorials/single_npu_audio.md:50
msgid "Run the following script to execute offline inference on a single XPU:"
msgstr "运行以下脚本以在单个 XPU 上执行离线推理:"
#: ../../tutorials/single_npu_audio.md:114
msgid "If you run this script successfully, you can see the info shown below:"
msgstr "如果你成功运行此脚本,你可以看到如下所示的信息:"
#: ../../tutorials/single_npu_audio.md:120
msgid "Online Serving on Single XPU"
msgstr "单个 XPU 上的在线服务"
#: ../../tutorials/single_npu_audio.md:122
msgid ""
"Currently, vllm's OpenAI-compatible server doesn't support audio inputs, "
"find more details [<u>here</u>](https://github.com/vllm-"
"project/vllm/issues/19977)."
msgstr ""
"目前vllm 的兼容 OpenAI 的服务器不支持音频输入,更多详情请查看[<u>这里</u>](https://github.com/vllm-"
"project/vllm/issues/19977)。"

View File

@@ -0,0 +1,99 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-07-18 09:01+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Language: zh_CN\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"Generated-By: Babel 2.17.0\n"
#: ../../tutorials/single_npu_multimodal.md:1
msgid "Single XPU (Qwen2.5-VL 7B)"
msgstr "单个XPUQwen2.5-VL 7B"
#: ../../tutorials/single_npu_multimodal.md:3
msgid "Run vllm-kunlun on Single XPU"
msgstr "在单个 XPU 上运行 vllm-kunlun"
#: ../../tutorials/single_npu_multimodal.md:5
msgid "Offline Inference on Single XPU"
msgstr "在单个XPU上进行离线推理"
#: ../../tutorials/single_npu_multimodal.md:7
msgid "Run docker container:"
msgstr "运行 docker 容器:"
#: ../../tutorials/single_npu_multimodal.md:29
msgid "Setup environment variables:"
msgstr "设置环境变量:"
#: ../../tutorials/single_npu_multimodal.md:40
msgid ""
"`max_split_size_mb` prevents the native allocator from splitting blocks "
"larger than this size (in MB). This can reduce fragmentation and may allow "
"some borderline workloads to complete without running out of memory. You can"
" find more details "
"[<u>here</u>](https://www.hikunlun.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/envref/envref_07_0061.html)."
msgstr ""
"`max_split_size_mb` 防止本地分配器拆分超过此大小(以 MB "
"为单位)的内存块。这可以减少内存碎片,并且可能让一些边缘情况下的工作负载顺利完成而不会耗尽内存。你可以在[<u>这里</u>](https://www.hikunlun.com/document/detail/zh/CANNCommunityEdition/800alpha003/apiref/envref/envref_07_0061.html)找到更多详细信息。"
#: ../../tutorials/single_npu_multimodal.md:43
msgid "Run the following script to execute offline inference on a single XPU:"
msgstr "运行以下脚本以在单个 XPU 上执行离线推理:"
#: ../../tutorials/single_npu_multimodal.md:109
msgid "If you run this script successfully, you can see the info shown below:"
msgstr "如果你成功运行此脚本,你可以看到如下所示的信息:"
#: ../../tutorials/single_npu_multimodal.md:121
msgid "Online Serving on Single XPU"
msgstr "单个 XPU 上的在线服务"
#: ../../tutorials/single_npu_multimodal.md:123
msgid "Run docker container to start the vLLM server on a single XPU:"
msgstr "运行 docker 容器,在单个 XPU 上启动 vLLM 服务器:"
#: ../../tutorials/single_npu_multimodal.md:154
msgid ""
"Add `--max_model_len` option to avoid ValueError that the "
"Qwen2.5-VL-7B-Instruct model's max seq len (128000) is larger than the "
"maximum number of tokens that can be stored in KV cache. This will differ "
"with different XPU series base on the HBM size. Please modify the value "
"according to a suitable value for your XPU series."
msgstr ""
"新增 `--max_model_len` 选项,以避免出现 ValueError即 Qwen2.5-VL-7B-Instruct "
"模型的最大序列长度128000大于 KV 缓存可存储的最大 token 数。该数值会根据不同 XPU 系列的 HBM 大小而不同。请根据你的 XPU"
" 系列,将该值设置为合适的数值。"
#: ../../tutorials/single_npu_multimodal.md:157
msgid "If your service start successfully, you can see the info shown below:"
msgstr "如果你的服务启动成功,你会看到如下所示的信息:"
#: ../../tutorials/single_npu_multimodal.md:165
msgid ""
"Once your server is started, you can query the model with input prompts:"
msgstr "一旦你的服务器启动,你可以通过输入提示词来查询模型:"
#: ../../tutorials/single_npu_multimodal.md:182
msgid ""
"If you query the server successfully, you can see the info shown below "
"(client):"
msgstr "如果你成功查询了服务器,你可以看到如下所示的信息(客户端):"
#: ../../tutorials/single_npu_multimodal.md:188
msgid "Logs of the vllm server:"
msgstr "vllm 服务器的日志:"

View File

@@ -0,0 +1,38 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/single_npu_qwen2.5_vl.md:1
msgid "Single XPU (Qwen2.5-VL 7B)"
msgstr ""
#: ../../source/tutorials/single_npu_qwen2.5_vl.md:3
msgid "Run vllm-kunlun on Single XPU"
msgstr ""
#: ../../source/tutorials/single_npu_qwen2.5_vl.md:5
msgid "Offline Inference on Single XPU"
msgstr ""
#: ../../source/tutorials/single_npu_qwen2.5_vl.md:7
msgid "Run docker container:"
msgstr ""

View File

@@ -0,0 +1,38 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/single_npu_qwen2_audio.md:1
msgid "Single XPU (Qwen2-Audio 7B)"
msgstr ""
#: ../../source/tutorials/single_npu_qwen2_audio.md:3
msgid "Run vllm-kunlun on Single XPU"
msgstr ""
#: ../../source/tutorials/single_npu_qwen2_audio.md:5
msgid "Offline Inference on Single XPU"
msgstr ""
#: ../../source/tutorials/single_npu_qwen2_audio.md:7
msgid "Run docker container:"
msgstr ""

View File

@@ -0,0 +1,77 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/single_npu_qwen3_embedding.md:1
msgid "Single XPU (Qwen3-Embedding-8B)"
msgstr "单个XPUQwen3-Embedding-8B"
#: ../../source/tutorials/single_npu_qwen3_embedding.md:3
msgid ""
"The Qwen3 Embedding model series is the latest proprietary model of the "
"Qwen family,"
msgstr ""
#~ msgid ""
#~ "The Qwen3 Embedding model series is "
#~ "the latest proprietary model of the "
#~ "Qwen family, specifically designed for "
#~ "text embedding and ranking tasks. "
#~ "Building upon the dense foundational "
#~ "models of the Qwen3 series, it "
#~ "provides a comprehensive range of text"
#~ " embeddings and reranking models in "
#~ "various sizes (0.6B, 4B, and 8B). "
#~ "This guide describes how to run "
#~ "the model with vLLM Kunlun. Note "
#~ "that only 0.9.2rc1 and higher versions"
#~ " of vLLM Kunlun support the model."
#~ msgstr ""
#~ "Qwen3 Embedding 模型系列是 Qwen "
#~ "家族最新的专有模型,专为文本嵌入和排序任务设计。在 Qwen3 "
#~ "系列的密集基础模型之上它提供了多种尺寸0.6B、4B 和 8B的文本嵌入与重排序模型。本指南介绍如何使用"
#~ " vLLM Kunlun 运行该模型。请注意,只有 vLLM Kunlun "
#~ "0.9.2rc1 及更高版本才支持该模型。"
#~ msgid "Run docker container"
#~ msgstr "运行 docker 容器"
#~ msgid ""
#~ "Take Qwen3-Embedding-8B model as an "
#~ "example, first run the docker container"
#~ " with the following command:"
#~ msgstr "以 Qwen3-Embedding-8B 模型为例,首先使用以下命令运行 docker 容器:"
#~ msgid "Setup environment variables:"
#~ msgstr "设置环境变量:"
#~ msgid "Online Inference"
#~ msgstr "在线推理"
#~ msgid "Once your server is started, you can query the model with input prompts"
#~ msgstr "一旦服务器启动,就可以通过输入提示词来查询模型。"
#~ msgid "Offline Inference"
#~ msgstr "离线推理"
#~ msgid "If you run this script successfully, you can see the info shown below:"
#~ msgstr "如果你成功运行此脚本,你可以看到如下所示的信息:"

View File

@@ -0,0 +1,30 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2025, vllm-kunlun team
# This file is distributed under the same license as the vllm-kunlun
# package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2025.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: vllm-kunlun \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-11-10 16:59+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.17.0\n"
#: ../../source/tutorials/single_npu_qwen3_quantization.md:1
msgid "Single-XPU (Qwen3 8B W4A8)"
msgstr ""
#: ../../source/tutorials/single_npu_qwen3_quantization.md:3
msgid "Run Docker Container"
msgstr ""